Clustering Comparision

Preamble

import pandas as pd
import numpy as np
import scanpy as sc
from sklearn.metrics.cluster import normalized_mutual_info_score, adjusted_rand_score
from sklearn.metrics import homogeneity_score, completeness_score, fowlkes_mallows_score, silhouette_score, davies_bouldin_score, calinski_harabasz_score
from sklearn.metrics.cluster import contingency_matrix, pair_confusion_matrix
from src.utils import sankey_plot
from sklearn.decomposition import PCA
import kaleido
from sklearn.preprocessing import StandardScaler
import plotly.io as pio
import matplotlib.pyplot as plt
import seaborn as sns
DIR = 'Data/'
DATASET_NAMES = ['PBMC1', 'PBMC2', 'PBMC3','PBMC4']
TOOLS = ['monocle', 'scanpy', 'scvi-tools', 'seurat', 'COTAN']
PARAMS_TUNING = ['default', 'celltypist', 'antibody']
def compute_scores(dir, dataset, labels_df, labels_matched, ground_truth_labels):
    scores = {}
    scores['NMI'] = {}
    scores['ARI'] = {}
    scores['homogeneity'] = {}
    scores['completeness'] = {}
    scores['fowlkes_mallows'] = {}
    scores['precision'] = {}
    scores['recall'] = {}
    
    for tool in TOOLS:
        scores['NMI'][tool] = normalized_mutual_info_score(labels_pred=labels_df['cluster_'+tool], labels_true=labels_df[f'cluster_{ground_truth_labels}'], average_method='arithmetic')
        scores['ARI'][tool] = adjusted_rand_score(labels_pred=labels_df['cluster_'+tool], labels_true=labels_df[f'cluster_{ground_truth_labels}'])
        scores['homogeneity'][tool] = homogeneity_score(labels_pred=labels_df['cluster_'+tool], labels_true=labels_df[f'cluster_{ground_truth_labels}'])
        scores['completeness'][tool] = completeness_score(labels_pred=labels_df['cluster_'+tool], labels_true=labels_df[f'cluster_{ground_truth_labels}'])
        scores['fowlkes_mallows'][tool] = fowlkes_mallows_score(labels_pred=labels_df['cluster_'+tool], labels_true=labels_df[f'cluster_{ground_truth_labels}'])
        sc = pair_confusion_matrix(labels_pred=labels_df['cluster_'+tool], labels_true=labels_df[f'cluster_{ground_truth_labels}'])
        TP = sc[1,1]
        FP = sc[0,1]
        FN = sc[1,0]
        P_score = TP/(TP+FP)
        scores['precision'][tool] = P_score
        scores['recall'][tool] = TP/(TP+FN)
        
        
        
        
    scores_df = pd.DataFrame(scores)
    scores_df.to_csv(f'{dir}{dataset}/scores_{labels_matched}_{ground_truth_labels}.csv')
    scores_df.to_latex(f'{dir}{dataset}/scores_{labels_matched}_{ground_truth_labels}.tex')
    display(scores_df)


def print_scores(dataset,tuning):
    
    
    # concat tools labels
    labels_df = pd.read_csv(f'{DIR}{dataset}/COTAN/{tuning}/clustering_labels.csv', index_col=0)
    labels_df.rename(columns={"cluster": "cluster_COTAN"}, inplace=True)
    #print("labels_df size")
    #print(labels_df.shape)
    for tool in [t for t in TOOLS if t != 'COTAN']:
        tool_labels_df = pd.read_csv(f'{DIR}{dataset}/{tool}/{tuning}/clustering_labels.csv', index_col=0)
        labels_df = labels_df.merge(tool_labels_df, how='inner', on='cell')
        labels_df.rename(columns={"cluster": f"cluster_{tool}"}, inplace=True)
    #    print("labels_df size"+tool)
    #    print(labels_df.shape)
    
    # load and concat celltypist labels
    celltypist_df = pd.read_csv(f'{DIR}{dataset}/celltypist/celltypist_labels.csv', index_col=0)
    celltypist_df.index = celltypist_df.index.str[:-2]
    celltypist_df = labels_df.merge(celltypist_df, how='inner', on='cell')
    celltypist_df.rename(columns={"cluster.ids": f"cluster_celltypist"}, inplace=True)
    celltypist_mapping_df = pd.read_csv(f'{DIR}{dataset}/celltypist/celltypist_mapping.csv', index_col=0)
    #print("celltypist_df size")
    #print(celltypist_df.shape)
    
    # load and concat protein surface labels
    antibody_df = pd.read_csv(f'{DIR}{dataset}/antibody_annotation/antibody_labels_postproc.csv', index_col=0)
    antibody_df = labels_df.merge(antibody_df, how='inner', on='cell')
    antibody_df.rename(columns={"cluster.ids": f"cluster_antibody"}, inplace=True)
    antibody_mapping_df = pd.read_csv(f'{DIR}{dataset}/antibody_annotation/antibody_mapping.csv', index_col=1, encoding='latin1')
    #print("antibody_df size")
    #print(antibody_df.shape)

    # read dataset
    adata = sc.read_10x_mtx(
        f'{DIR}{dataset}/filtered/10X/',
        var_names='gene_symbols',
        cache=False
    )
    # keep only labelled cells
    adata.var_names_make_unique()
    subset_cells = adata.obs_names.isin(labels_df.index)
    adata = adata[subset_cells, :]

    mito_genes = adata.var_names.str.startswith('MT-')
    # for each cell compute fraction of counts in mito genes vs. all genes
    # the `.A1` is only necessary as X is sparse (to transform to a dense array after summing)
    adata.obs['percent_mito'] = np.sum(adata[:, mito_genes].X, axis=1).A1 / np.sum(adata.X, axis=1).A1
    # add the total counts per cell as observations-annotation to adata
    adata.obs['n_counts'] = adata.X.sum(axis=1).A1

    sc.pp.normalize_total(adata, target_sum=1e4)
    sc.pp.log1p(adata)
    sc.pp.highly_variable_genes(adata, min_mean=0.00125, max_mean=3, min_disp=0.5)
    adata.raw = adata
    adata = adata[:, adata.var.highly_variable]
    #sc.pp.regress_out(adata, ['n_counts', 'percent_mito'])
    sc.pp.scale(adata, max_value=10)
    sc.tl.pca(adata, svd_solver='arpack',n_comps=20)
    pca_matrix = adata.obsm['X_pca']
    scaler = StandardScaler()
    scaled_pca_matrix = scaler.fit_transform(pca_matrix)

    #Custers number
    
    df = {}
    for tool in TOOLS:
        df[tool] = labels_df[f'cluster_{tool}'].unique().shape[0]
    df_size = pd.DataFrame(df, index=[0])
    display(f'{dataset} - number of clusters')
    display(df_size)

    # compute silhouette, Calinski_Harabasz and davies_bouldin scores with scaled PCA    
    silhouette = {}
    Calinski_Harabasz = {}
    davies_bouldin = {}
    for tool in TOOLS:
        silhouette[tool] = silhouette_score(scaled_pca_matrix, labels_df[f'cluster_{tool}'])
        Calinski_Harabasz[tool] =  calinski_harabasz_score(scaled_pca_matrix, labels_df[f'cluster_{tool}'])
        davies_bouldin[tool] = davies_bouldin_score(scaled_pca_matrix, labels_df[f'cluster_{tool}'])
    if tuning=='celltypist':
        silhouette['celltypist'] = silhouette_score(scaled_pca_matrix, celltypist_df[f'cluster_celltypist'])
        Calinski_Harabasz['celltypist'] = calinski_harabasz_score(scaled_pca_matrix, celltypist_df[f'cluster_celltypist'])
        davies_bouldin['celltypist'] = davies_bouldin_score(scaled_pca_matrix, celltypist_df[f'cluster_celltypist'])
    elif tuning=='antibody':
        silhouette['antibody'] = silhouette_score(scaled_pca_matrix, antibody_df[f'cluster_antibody'])
        Calinski_Harabasz['antibody'] = calinski_harabasz_score(scaled_pca_matrix, antibody_df[f'cluster_antibody'])
        davies_bouldin['antibody'] = davies_bouldin_score(scaled_pca_matrix, antibody_df[f'cluster_antibody'])
        
    silhouette_df = pd.DataFrame(silhouette, index=[0])
    silhouette_df.to_csv(f'{DIR}{dataset}/{tuning}_silhouette.csv')
    silhouette_df.to_latex(f'{DIR}{dataset}/{tuning}_silhouette.tex')
    display(f'{dataset} - Silhuette (higher is better)')
    display(silhouette_df)

    Calinski_Harabasz_df = pd.DataFrame(Calinski_Harabasz, index=[0])
    Calinski_Harabasz_df.to_csv(f'{DIR}{dataset}/{tuning}_Calinski_Harabasz.csv')
    Calinski_Harabasz_df.to_latex(f'{DIR}{dataset}/{tuning}_Calinski_Harabasz.tex')
    display(f'{dataset} - Calinski_Harabasz (higher is better)')
    display(Calinski_Harabasz_df)

    davies_bouldin_df = pd.DataFrame(davies_bouldin, index=[0])
    davies_bouldin_df.to_csv(f'{DIR}{dataset}/{tuning}_davies_bouldin.csv')
    davies_bouldin_df.to_latex(f'{DIR}{dataset}/{tuning}_davies_bouldin.tex')
    display(f'{dataset} - davies_bouldin (lower is better)')
    display(davies_bouldin_df)

    # compute silhouette, Calinski_Harabasz and davies_bouldin scores with cellTypist probability
    celltypist_prob_df = pd.read_csv(f'{DIR}{dataset}/celltypist/Immune_All_Low_probability_matrix.csv', index_col=0)
    #labels_df = pd.read_csv(f'{DIR}{dataset}/COTAN/{tuning}/clustering_labels.csv', index_col=0)
    celltypist_prob_df.index = celltypist_prob_df.index.str[:-2]
    subset_cells = celltypist_prob_df.index.isin(labels_df.index)
    celltypist_prob_df = celltypist_prob_df[subset_cells]
    
    pca = PCA(n_components=20,svd_solver='arpack')
    pca_data = pca.fit_transform(celltypist_prob_df)
    df_prob = pd.DataFrame(pca_data)
    df_prob.index = celltypist_prob_df.index
    scaler = StandardScaler()
    scaled_pca_data = pd.DataFrame(scaler.fit_transform(df_prob))
    scaled_pca_data.index = celltypist_prob_df.index
    
    silhouette = {}
    Calinski_Harabasz = {}
    davies_bouldin = {}
    for tool in TOOLS:
        silhouette[tool] = silhouette_score(scaled_pca_data, labels_df[f'cluster_{tool}'])
        Calinski_Harabasz[tool] =  calinski_harabasz_score(scaled_pca_data, labels_df[f'cluster_{tool}'])
        davies_bouldin[tool] = davies_bouldin_score(scaled_pca_data, labels_df[f'cluster_{tool}'])
    if tuning=='celltypist':
        silhouette['celltypist'] = silhouette_score(scaled_pca_data, celltypist_df[f'cluster_celltypist'])
        Calinski_Harabasz['celltypist'] = calinski_harabasz_score(scaled_pca_data, celltypist_df[f'cluster_celltypist'])
        davies_bouldin['celltypist'] = davies_bouldin_score(scaled_pca_data, celltypist_df[f'cluster_celltypist'])
    elif tuning=='antibody':
        silhouette['antibody'] = silhouette_score(scaled_pca_data, antibody_df[f'cluster_antibody'])
        Calinski_Harabasz['antibody'] = calinski_harabasz_score(scaled_pca_matrix, antibody_df[f'cluster_antibody'])
        davies_bouldin['antibody'] = davies_bouldin_score(scaled_pca_matrix, antibody_df[f'cluster_antibody'])
        
    silhouette_df = pd.DataFrame(silhouette, index=[0])
    silhouette_df.to_csv(f'{DIR}{dataset}/{tuning}_silhouette_fromProb.csv')
    silhouette_df.to_latex(f'{DIR}{dataset}/{tuning}_silhouette_fromProb.tex')
    display(f'{dataset} - Silhuette from Prob. (higher is better)')
    display(silhouette_df)

    Calinski_Harabasz_df = pd.DataFrame(Calinski_Harabasz, index=[0])
    Calinski_Harabasz_df.to_csv(f'{DIR}{dataset}/{tuning}_Calinski_Harabasz_fromProb.csv')
    Calinski_Harabasz_df.to_latex(f'{DIR}{dataset}/{tuning}_Calinski_Harabasz_fromProb.tex')
    display(f'{dataset} - Calinski_Harabasz from Prob. (higher is better)')
    display(Calinski_Harabasz_df)

    davies_bouldin_df = pd.DataFrame(davies_bouldin, index=[0])
    davies_bouldin_df.to_csv(f'{DIR}{dataset}/{tuning}_davies_bouldin_fromProb.csv')
    davies_bouldin_df.to_latex(f'{DIR}{dataset}/{tuning}_davies_bouldin_fromProb.tex')
    display(f'{dataset} - davies_bouldin  from Prob. (lower is better)')
    display(davies_bouldin_df)

    display(f'{dataset} - matching {tuning} labels' if tuning != 'default' else f'{dataset} - default labels')

    # compute scores comparing each tool labels with celltypist labels
    if tuning == 'celltypist' or tuning == 'default':
        compute_scores(DIR, dataset, celltypist_df, tuning, 'celltypist')
        labels = []
        labels_titles = []
        for tool in TOOLS:
            labels.append(celltypist_df[f'cluster_{tool}'].to_list())
            labels_titles.append(tool)
        labels.append(celltypist_df[f'cluster_celltypist'].map(celltypist_mapping_df['go'].to_dict()).to_list())
        labels_titles.append('celltypist')
        title = f'{dataset} - matching {tuning} labels' if tuning != 'default' else f'{dataset} - default labels'
        sankey_plot(labels=labels, labels_titles=labels_titles, title=title, path=f'{DIR}{dataset}/{tuning}_celltypist.html')
    
    # compute scores comparing each tool labels with protein labels
    if tuning == 'antibody' or tuning == 'default':
        compute_scores(DIR, dataset, antibody_df, tuning, 'antibody')
        labels = []
        labels_titles = []
        for tool in TOOLS:
            labels.append(antibody_df[f'cluster_{tool}'].to_list())
            labels_titles.append(tool)
        labels.append(antibody_df[f'cluster_antibody'].map(antibody_mapping_df['go'].to_dict()).to_list())
        labels_titles.append('antibody')
        title = f'{dataset} - matching {tuning} labels' if tuning != 'default' else f'{dataset} - default labels'
        sankey_plot(labels=labels, labels_titles=labels_titles, title=title, path=f'{DIR}{dataset}/{tuning}_antibody.html')
def print_clustering_data(dataset,tuning):
    # concat tools labels
    labels_df = pd.read_csv(f'{DIR}{dataset}/COTAN/{tuning}/clustering_labels.csv', index_col=0)
    labels_df.rename(columns={"cluster": "cluster_COTAN"}, inplace=True)
    display(f'Initial COTAN cluster number:')
    display(labels_df.cluster_COTAN.unique().shape[0])
    #print("labels_df size")
    #print(labels_df.shape)
    for tool in [t for t in TOOLS if t != 'COTAN']:
        tool_labels_df = pd.read_csv(f'{DIR}{dataset}/{tool}/{tuning}/clustering_labels.csv', index_col=0)
        display(f'Initial {tool} cluster number:')
        display(labels_df[labels_df.columns[-1]].unique().shape[0])
        labels_df = labels_df.merge(tool_labels_df, how='inner', on='cell')
        labels_df.rename(columns={"cluster": f"cluster_{tool}"}, inplace=True)
    #    print("labels_df size"+tool)
    #    print(labels_df.shape)
    
    if tuning == 'celltypist' or tuning == 'default':
    # load and concat celltypist labels
        celltypist_df = pd.read_csv(f'{DIR}{dataset}/celltypist/celltypist_labels.csv', index_col=0)
        celltypist_df.index = celltypist_df.index.str[:-2]
        celltypist_df = labels_df.merge(celltypist_df, how='inner', on='cell')
        celltypist_df.rename(columns={"cluster.ids": f"cluster_celltypist"}, inplace=True)
        celltypist_mapping_df = pd.read_csv(f'{DIR}{dataset}/celltypist/celltypist_mapping.csv', index_col=0)
        #print("celltypist_df size")
        #print(celltypist_df.shape)
        labels_cluster_celltypist = np.unique(celltypist_df["cluster_celltypist"])
        for tool in TOOLS:
            labels_cluster_tool = np.unique(celltypist_df[f'cluster_{tool}'])
            cm =contingency_matrix(celltypist_df["cluster_celltypist"], celltypist_df[f'cluster_{tool}'])
            cm = pd.DataFrame(cm,index=labels_cluster_celltypist,columns=labels_cluster_tool)
            display(f'{dataset} - contingency_matrix (rows: cellTypist - cols: {tool})')
            display(cm)

    if tuning == 'antibody' or tuning == 'default':
        #load and concat protein surface labels
        antibody_df = pd.read_csv(f'{DIR}{dataset}/antibody_annotation/antibody_labels_postproc.csv', index_col=0)
        display("Initial antibody cell/cluster table:")
        display(antibody_df["cluster.ids"].value_counts())
        antibody_df = labels_df.merge(antibody_df, how='inner', on='cell')
        antibody_df.rename(columns={"cluster.ids": f"cluster_antibody"}, inplace=True)
        
        antibody_mapping_df = pd.read_csv(f'{DIR}{dataset}/antibody_annotation/antibody_mapping.csv', index_col=1, encoding='latin1')
        labels_cluster_antybody = np.unique(antibody_df["cluster_antibody"])
        for tool in TOOLS:
            labels_cluster_tool = np.unique(antibody_df[f'cluster_{tool}'])
            cm =contingency_matrix(antibody_df["cluster_antibody"], antibody_df[f'cluster_{tool}'])
            cm = pd.DataFrame(cm,index=labels_cluster_antybody,columns=labels_cluster_tool)
            display(f'{dataset} - contingency_matrix (rows: antibody - cols: {tool})')
            display(cm)
        

Data summary information

Default parameters

print_clustering_data(tuning = 'default',dataset="PBMC1")
'Initial COTAN cluster number:'
14
'Initial monocle cluster number:'
14
'Initial scanpy cluster number:'
3
'Initial scvi-tools cluster number:'
18
'Initial seurat cluster number:'
13
'PBMC1 - contingency_matrix (rows: cellTypist - cols: monocle)'
1 2 3
1 8 970 1
2 943 0 0
3 47 0 0
4 0 78 0
5 309 0 0
6 0 0 142
7 82 0 0
8 278 0 1
9 81 0 0
10 0 171 0
11 70 0 0
12 240 0 0
13 0 28 0
14 0 0 155
15 2 4 0
'PBMC1 - contingency_matrix (rows: cellTypist - cols: scanpy)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
1 0 0 246 0 267 0 263 0 200 0 1 0 2 0 0 0 0 0
2 88 321 0 281 1 0 0 241 0 1 0 1 0 1 0 8 0 0
3 45 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0
4 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 76 0
5 250 5 0 0 0 0 0 2 0 0 0 52 0 0 0 0 0 0
6 0 0 0 0 0 0 0 0 0 0 4 0 138 0 0 0 0 0
7 8 0 0 0 0 0 0 0 0 19 0 37 0 18 0 0 0 0
8 0 0 0 0 0 263 0 0 0 16 0 0 0 0 0 0 0 0
9 2 2 0 5 0 0 0 5 0 0 0 0 0 1 0 66 0 0
10 0 0 75 0 0 0 5 0 0 0 0 0 0 0 91 0 0 0
11 0 0 0 0 0 5 0 0 0 65 0 0 0 0 0 0 0 0
12 6 0 0 0 0 0 0 4 0 82 0 59 0 86 0 3 0 0
13 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 28
14 0 0 0 0 0 0 0 0 0 0 154 0 1 0 0 0 0 0
15 3 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
'PBMC1 - contingency_matrix (rows: cellTypist - cols: scvi-tools)'
1 2 3 4 5 6 7 8 9 10 11 12 13
1 659 0 0 0 289 0 1 0 0 30 0 0 0
2 0 485 48 402 0 2 0 0 0 0 0 6 0
3 0 1 41 5 0 0 0 0 0 0 0 0 0
4 0 0 0 0 6 0 0 0 0 72 0 0 0
5 0 5 288 12 0 4 0 0 0 0 0 0 0
6 0 0 0 0 0 0 1 2 139 0 0 0 0
7 0 0 58 0 0 23 1 0 0 0 0 0 0
8 0 0 0 0 0 0 279 0 0 0 0 0 0
9 0 4 0 2 0 2 0 0 0 0 0 73 0
10 1 0 0 0 78 0 0 0 0 0 92 0 0
11 0 0 0 0 0 67 3 0 0 0 0 0 0
12 0 1 48 1 0 189 0 0 0 0 0 1 0
13 0 0 0 0 0 0 0 0 0 0 0 0 28
14 0 0 0 0 0 1 0 147 7 0 0 0 0
15 0 3 0 0 0 3 0 0 0 0 0 0 0
'PBMC1 - contingency_matrix (rows: cellTypist - cols: seurat)'
1 2 3 4 5 6 7 8 9 10 11
1 0 0 616 361 1 0 0 0 1 0 0
2 798 145 0 0 0 0 0 0 0 0 0
3 1 46 0 0 0 0 0 0 0 0 0
4 0 0 0 4 0 0 0 0 0 74 0
5 0 309 0 0 0 0 0 0 0 0 0
6 0 0 0 0 0 0 0 142 0 0 0
7 0 55 0 0 0 27 0 0 0 0 0
8 0 0 0 0 274 5 0 0 0 0 0
9 78 3 0 0 0 0 0 0 0 0 0
10 0 0 0 43 0 0 0 0 128 0 0
11 0 0 0 0 5 65 0 0 0 0 0
12 7 69 0 0 0 164 0 0 0 0 0
13 0 0 0 0 0 0 0 0 0 1 27
14 0 0 0 0 0 0 153 2 0 0 0
15 5 1 0 0 0 0 0 0 0 0 0
'PBMC1 - contingency_matrix (rows: cellTypist - cols: COTAN)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14
1 35 0 11 648 284 0 0 0 0 0 1 0 0 0
2 0 0 0 0 0 0 0 0 7 0 0 0 807 129
3 0 0 0 0 0 0 0 0 0 0 0 37 2 8
4 0 73 2 3 0 0 0 0 0 0 0 0 0 0
5 0 0 0 0 0 0 0 0 0 0 0 1 0 308
6 0 0 0 0 0 0 0 142 0 0 0 0 0 0
7 0 0 0 0 0 0 0 0 0 25 1 0 0 56
8 0 0 0 0 0 0 0 0 55 4 220 0 0 0
9 0 0 0 0 0 0 0 0 34 1 0 0 45 1
10 0 1 146 23 0 0 0 0 0 0 0 0 1 0
11 0 0 0 0 0 0 0 0 2 67 1 0 0 0
12 0 0 0 0 0 0 0 0 3 175 0 1 5 56
13 0 1 0 0 0 27 0 0 0 0 0 0 0 0
14 0 0 0 0 0 0 152 3 0 0 0 0 0 0
15 0 0 0 0 2 0 0 0 4 0 0 0 0 0
'Initial antibody cell/cluster table:'
cluster.ids
7     1338
8      876
3      748
4      341
9      331
5      211
1      202
6      131
2       62
10      51
12      16
Name: count, dtype: int64
'PBMC1 - contingency_matrix (rows: antibody - cols: monocle)'
1 2 3
1 161 0 0
2 0 43 3
3 600 0 0
4 262 1 0
5 158 0 0
6 1 86 0
7 10 1115 0
8 812 1 1
9 1 0 294
10 44 0 0
12 10 0 0
'PBMC1 - contingency_matrix (rows: antibody - cols: scanpy)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
1 7 6 0 4 0 0 0 9 0 23 0 7 0 32 0 73 0 0
2 0 0 3 0 0 0 0 0 0 0 2 0 3 0 38 0 0 0
3 366 33 0 5 0 0 0 28 0 43 0 122 0 3 0 0 0 0
4 0 0 0 0 1 249 0 0 0 13 0 0 0 0 0 0 0 0
5 9 0 0 0 0 0 0 2 0 64 0 15 0 68 0 0 0 0
6 1 3 0 0 0 0 0 0 0 0 0 0 0 0 1 0 56 26
7 1 0 319 0 267 0 267 1 198 0 0 0 0 0 52 0 19 1
8 18 288 0 277 0 1 0 214 0 3 1 5 0 2 0 4 0 1
9 0 0 0 0 0 1 0 0 0 1 156 0 137 0 0 0 0 0
10 0 0 0 0 0 7 0 0 0 36 0 0 0 1 0 0 0 0
12 0 0 0 0 0 10 0 0 0 0 0 0 0 0 0 0 0 0
'PBMC1 - contingency_matrix (rows: antibody - cols: scvi-tools)'
1 2 3 4 5 6 7 8 9 10 11 12 13
1 0 6 12 5 0 62 0 0 0 0 0 76 0
2 2 0 0 0 3 0 0 1 1 0 39 0 0
3 0 49 441 66 0 44 0 0 0 0 0 0 0
4 1 0 0 0 0 0 262 0 0 0 0 0 0
5 0 2 12 1 0 141 0 0 0 0 0 2 0
6 0 1 0 0 0 3 0 0 0 56 1 0 26
7 655 3 0 0 368 0 1 0 0 45 52 0 1
8 0 438 17 349 0 4 3 0 0 0 0 2 1
9 0 0 0 0 0 0 3 148 144 0 0 0 0
10 0 0 1 0 0 37 6 0 0 0 0 0 0
12 0 0 0 0 0 0 10 0 0 0 0 0 0
'PBMC1 - contingency_matrix (rows: antibody - cols: seurat)'
1 2 3 4 5 6 7 8 9 10 11
1 93 17 0 0 0 51 0 0 0 0 0
2 0 0 1 4 0 0 1 1 39 0 0
3 23 540 0 0 0 37 0 0 0 0 0
4 0 0 1 0 260 2 0 0 0 0 0
5 1 28 0 0 0 129 0 0 0 0 0
6 3 1 0 1 0 0 0 0 1 56 25
7 2 1 611 402 1 0 0 0 89 18 1
8 766 41 0 0 1 4 1 0 0 0 1
9 0 0 0 0 2 0 151 142 0 0 0
10 0 0 0 0 8 36 0 0 0 0 0
12 0 0 0 0 8 2 0 0 0 0 0
'PBMC1 - contingency_matrix (rows: antibody - cols: COTAN)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14
1 0 0 0 0 0 0 0 0 36 54 0 1 59 11
2 1 1 40 2 0 0 1 1 0 0 0 0 0 0
3 0 0 0 0 0 0 0 0 0 39 0 38 33 490
4 1 0 0 0 0 0 0 0 48 1 213 0 0 0
5 0 0 0 0 0 0 0 0 1 135 0 0 0 22
6 0 56 1 1 1 25 0 0 3 0 0 0 0 0
7 33 17 118 668 284 1 0 0 0 0 1 0 2 1
8 0 0 0 0 0 1 1 0 9 4 0 0 765 34
9 0 0 0 0 0 0 150 143 0 0 2 0 0 0
10 0 0 0 0 0 0 0 0 4 38 2 0 0 0
12 0 0 0 0 0 0 0 0 4 1 5 0 0 0
print_clustering_data(tuning = 'default',dataset="PBMC2")
'Initial COTAN cluster number:'
17
'Initial monocle cluster number:'
17
'Initial scanpy cluster number:'
2
'Initial scvi-tools cluster number:'
18
'Initial seurat cluster number:'
20
'PBMC2 - contingency_matrix (rows: cellTypist - cols: monocle)'
1 2
1 230 1
2 427 0
3 2139 3
4 700 7
5 316 0
6 0 93
7 0 567
8 674 0
9 0 186
10 52 0
11 0 228
12 0 204
13 0 48
14 0 14
15 80 0
16 0 6
'PBMC2 - contingency_matrix (rows: cellTypist - cols: scanpy)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
1 0 0 0 2 0 0 1 22 1 2 5 50 0 148 0 0 0 0
2 0 91 0 273 0 0 0 3 2 0 56 1 0 1 0 0 0 0
3 942 508 21 183 0 0 0 21 295 2 100 42 0 26 1 0 0 1
4 0 0 0 1 0 463 8 2 0 230 0 0 0 2 1 0 0 0
5 0 0 0 2 0 0 0 266 0 0 42 0 0 6 0 0 0 0
6 0 0 0 0 5 0 0 0 0 0 0 0 0 0 0 88 0 0
7 0 0 0 0 466 0 0 0 0 0 0 0 15 0 86 0 0 0
8 2 1 558 0 0 0 0 3 0 0 0 110 0 0 0 0 0 0
9 0 0 0 0 12 0 0 0 0 0 0 0 174 0 0 0 0 0
10 0 0 0 0 0 0 0 0 0 50 2 0 0 0 0 0 0 0
11 0 0 0 0 0 0 228 0 0 0 0 0 0 0 0 0 0 0
12 0 0 0 0 0 0 204 0 0 0 0 0 0 0 0 0 0 0
13 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 48 0
14 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 13
15 0 25 0 46 0 0 0 1 2 1 2 2 0 1 0 0 0 0
16 0 0 0 1 0 0 0 0 5 0 0 0 0 0 0 0 0 0
'PBMC2 - contingency_matrix (rows: cellTypist - cols: scvi-tools)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
1 0 0 0 0 0 0 222 1 2 0 0 4 1 1 0 0 0 0 0 0
2 331 4 1 0 0 0 7 10 53 0 0 1 0 10 4 0 6 0 0 0
3 391 733 2 1 19 1 41 304 158 185 0 2 43 90 75 0 71 0 0 26
4 1 0 675 1 0 8 3 0 0 0 0 0 0 0 0 0 0 19 0 0
5 7 0 1 0 0 0 11 9 66 0 0 151 71 0 0 0 0 0 0 0
6 0 0 0 12 0 0 0 0 0 0 0 0 0 0 0 78 0 0 3 0
7 0 0 0 564 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0
8 0 2 0 1 561 0 103 1 0 2 0 1 3 0 0 0 0 0 0 0
9 0 0 0 11 0 0 0 0 0 0 175 0 0 0 0 0 0 0 0 0
10 2 0 4 0 0 0 0 0 0 0 0 0 2 0 0 0 0 44 0 0
11 0 0 0 0 0 228 0 0 0 0 0 0 0 0 0 0 0 0 0 0
12 0 0 0 0 0 204 0 0 0 0 0 0 0 0 0 0 0 0 0 0
13 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 48 0
14 0 0 0 0 0 0 0 0 0 14 0 0 0 0 0 0 0 0 0 0
15 58 0 1 0 0 0 5 5 1 5 0 0 1 3 0 0 1 0 0 0
16 0 0 0 0 0 0 0 0 0 0 0 0 6 0 0 0 0 0 0 0
'PBMC2 - contingency_matrix (rows: cellTypist - cols: seurat)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14
1 0 4 6 0 0 0 219 0 1 0 0 1 0 0
2 0 400 23 0 0 0 1 0 3 0 0 0 0 0
3 1151 352 550 0 2 15 69 1 0 0 0 2 0 0
4 0 0 2 635 0 0 3 7 0 0 1 59 0 0
5 0 42 0 0 0 0 7 0 267 0 0 0 0 0
6 0 0 0 0 11 0 0 0 0 0 0 0 82 0
7 0 0 0 0 567 0 0 0 0 0 0 0 0 0
8 8 4 2 0 0 541 119 0 0 0 0 0 0 0
9 0 0 0 0 14 0 0 0 0 172 0 0 0 0
10 0 0 0 3 0 0 0 0 0 0 0 49 0 0
11 0 0 0 0 0 0 0 83 0 0 145 0 0 0
12 0 0 0 0 0 0 0 201 1 0 2 0 0 0
13 0 0 0 0 0 0 0 0 0 0 0 0 0 48
14 0 0 0 0 0 0 0 0 0 0 0 14 0 0
15 0 0 77 0 0 0 2 0 0 0 0 1 0 0
16 0 0 0 0 0 0 0 0 0 0 0 6 0 0
'PBMC2 - contingency_matrix (rows: cellTypist - cols: COTAN)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
1 0 0 0 3 4 220 3 0 0 0 1 0 0 0 0 0 0
2 0 0 0 416 9 0 2 0 0 0 0 0 0 0 0 0 0
3 0 2 1186 847 31 72 3 0 0 0 0 0 0 0 0 0 1
4 639 56 0 0 2 3 0 0 0 0 0 0 0 0 1 5 1
5 0 0 0 39 0 7 270 0 0 0 0 0 0 0 0 0 0
6 0 0 0 0 0 0 0 0 0 86 0 7 0 0 0 0 0
7 1 0 0 0 0 0 0 0 0 0 88 300 174 0 0 3 1
8 0 0 568 9 1 96 0 0 0 0 0 0 0 0 0 0 0
9 0 0 0 0 0 0 0 154 24 0 0 8 0 0 0 0 0
10 1 51 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
11 0 0 0 0 0 0 0 0 0 0 0 0 0 0 145 69 14
12 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 22 180
13 0 0 0 0 0 0 0 0 0 0 0 0 0 48 0 0 0
14 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 13 1
15 0 1 0 6 72 1 0 0 0 0 0 0 0 0 0 0 0
16 0 0 0 1 0 1 0 0 0 0 1 0 3 0 0 0 0
'Initial antibody cell/cluster table:'
cluster.ids
4     1510
11    1130
8      695
12     570
6      424
13     275
5      197
2      150
10     122
3       84
7       76
Name: count, dtype: int64
'PBMC2 - contingency_matrix (rows: antibody - cols: monocle)'
1 2
2 0 147
3 60 20
4 1480 13
5 196 0
6 416 1
7 68 7
8 681 5
10 0 119
11 1115 9
12 566 4
13 0 271
'PBMC2 - contingency_matrix (rows: antibody - cols: scanpy)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
2 0 0 0 0 4 0 0 0 0 0 0 0 141 0 2 0 0 0
3 0 1 1 0 0 3 0 0 0 54 0 0 18 2 1 0 0 0
4 89 588 0 478 0 0 0 56 63 0 194 10 1 3 3 4 3 1
5 7 1 14 2 0 0 0 118 2 2 4 23 0 23 0 0 0 0
6 0 2 9 13 0 0 1 128 1 0 3 124 0 136 0 0 0 0
7 1 3 27 3 0 0 0 6 1 0 1 23 0 3 0 5 2 0
8 0 0 0 0 0 459 4 0 0 220 0 0 2 0 0 0 1 0
10 0 0 0 0 107 0 0 0 0 0 0 0 3 0 3 6 0 0
11 843 26 5 5 0 0 5 4 229 0 1 2 0 0 1 2 1 0
12 2 0 522 0 0 0 2 1 1 2 0 21 0 17 1 0 1 0
13 0 0 0 0 0 0 263 0 0 0 0 0 7 0 0 0 1 0
'PBMC2 - contingency_matrix (rows: antibody - cols: scvi-tools)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
2 0 0 0 5 0 0 0 0 0 0 142 0 0 0 0 0 0 0 0 0
3 0 0 55 0 1 0 0 1 0 0 19 0 0 0 0 0 0 4 0 0
4 758 143 0 4 0 0 30 125 259 52 0 5 4 54 26 4 23 0 3 3
5 4 8 1 0 5 0 27 4 6 0 0 36 105 0 0 0 0 0 0 0
6 4 0 0 0 17 0 270 2 9 0 0 112 3 0 0 0 0 0 0 0
7 0 1 0 1 31 0 27 2 1 1 0 5 0 0 0 4 0 0 2 0
8 0 0 622 0 0 4 0 0 0 0 2 0 1 0 0 0 0 56 1 0
10 0 0 0 111 0 0 0 0 0 0 4 0 0 0 0 4 0 0 0 0
11 14 581 0 1 5 5 2 191 5 138 0 0 1 50 51 2 54 0 1 23
12 0 2 1 1 520 2 34 1 0 1 0 0 6 0 0 0 1 0 1 0
13 0 0 0 7 0 263 0 0 0 0 0 0 0 0 0 0 0 0 1 0
'PBMC2 - contingency_matrix (rows: antibody - cols: seurat)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14
2 0 0 0 0 8 0 0 0 0 139 0 0 0 0
3 0 0 1 9 2 1 1 0 0 18 0 48 0 0
4 65 756 607 0 4 0 10 0 42 0 0 2 4 3
5 6 6 2 0 0 12 65 0 104 0 0 1 0 0
6 0 22 3 0 0 7 267 0 118 0 0 0 0 0
7 2 7 5 0 0 21 30 0 3 0 0 0 5 2
8 0 0 0 627 0 0 0 3 0 2 1 52 0 1
10 0 0 0 0 113 0 0 0 0 3 0 0 3 0
11 1073 4 35 0 1 1 1 4 1 0 1 0 2 1
12 9 0 0 0 1 512 44 1 0 0 1 1 0 1
13 0 0 0 0 7 0 0 159 1 0 103 0 0 1
'PBMC2 - contingency_matrix (rows: antibody - cols: COTAN)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
2 0 0 0 0 0 0 0 137 4 0 2 3 1 0 0 0 0
3 8 49 1 2 0 1 0 1 17 0 1 0 0 0 0 0 0
4 0 0 76 1235 110 16 44 0 0 4 3 0 1 3 0 1 0
5 0 0 19 6 1 65 105 0 0 0 0 0 0 0 0 0 0
6 0 0 11 23 1 261 120 0 0 0 1 0 0 0 0 0 0
7 0 0 32 16 1 15 4 0 0 5 0 0 0 2 0 0 0
8 630 50 0 0 0 0 0 0 2 0 0 0 0 1 1 2 0
10 0 0 0 0 0 0 0 4 0 5 1 76 33 0 0 0 0
11 0 0 1082 27 4 1 1 0 0 2 1 0 0 1 1 3 1
12 0 1 526 0 0 38 1 0 0 0 0 0 0 1 1 1 1
13 0 0 0 0 0 0 1 0 0 0 5 0 0 1 102 29 133
print_clustering_data(tuning = 'default',dataset="PBMC3")
'Initial COTAN cluster number:'
32
'Initial monocle cluster number:'
32
'Initial scanpy cluster number:'
3
'Initial scvi-tools cluster number:'
22
'Initial seurat cluster number:'
17
'PBMC3 - contingency_matrix (rows: cellTypist - cols: monocle)'
1 2 3
1 3021 0 0
2 1 1471 0
3 6 1 655
4 1100 0 0
5 1183 26 33
6 0 156 0
7 1112 1 0
8 484 0 0
9 0 0 396
10 0 408 0
11 430 0 0
12 0 0 8
13 233 0 0
14 111 0 0
15 4 16 0
16 0 11 0
17 0 8 0
18 0 57 0
19 0 12 0
'PBMC3 - contingency_matrix (rows: cellTypist - cols: scanpy)'
1 2 3 4 5 6 7 8 9 10 ... 13 14 15 16 17 18 19 20 21 22
1 1401 0 153 0 0 29 26 534 121 429 ... 1 0 100 0 0 0 0 0 28 0
2 0 0 0 0 816 0 0 0 0 0 ... 0 2 0 227 0 5 0 35 0 0
3 0 0 0 543 0 0 0 0 5 0 ... 0 0 0 0 111 0 0 2 0 0
4 26 0 0 0 0 806 12 5 29 6 ... 0 0 155 0 0 0 0 0 1 0
5 0 961 0 0 0 0 3 0 0 0 ... 216 0 0 29 33 0 0 0 0 0
6 0 0 0 0 8 0 0 0 0 0 ... 0 0 0 0 0 147 0 0 0 0
7 0 0 683 0 0 0 7 128 236 15 ... 0 0 6 1 0 0 0 0 0 0
8 0 0 0 0 0 0 423 0 32 0 ... 4 0 6 0 0 0 0 0 0 0
9 0 0 0 395 0 0 0 0 0 0 ... 0 0 0 0 1 0 0 0 0 0
10 0 0 0 0 90 0 0 0 0 0 ... 0 311 0 1 0 1 0 1 0 0
11 0 2 0 0 0 0 281 2 54 0 ... 4 0 42 0 0 0 0 0 0 0
12 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 8 0 0 0 0 0
13 12 0 116 0 0 0 1 52 9 11 ... 0 0 1 0 0 0 0 0 0 0
14 0 5 1 0 0 0 5 0 3 0 ... 97 0 0 0 0 0 0 0 0 0
15 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 20
16 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 11 0 0 0 0 0 0
17 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 8 0 0 0 0
18 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 57 0 0 0
19 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 12 0 0 0 0 0

19 rows × 22 columns

'PBMC3 - contingency_matrix (rows: cellTypist - cols: scvi-tools)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
1 740 1603 2 1 1 40 86 23 248 1 197 79 0 0 0 0 0
2 0 0 1460 0 1 0 0 0 0 3 0 0 8 0 0 0 0
3 0 0 4 2 655 0 1 0 0 0 0 0 0 0 0 0 0
4 6 11 0 1 1 912 162 2 1 0 4 0 0 0 0 0 0
5 0 0 29 1165 31 0 7 1 0 0 0 0 0 8 0 1 0
6 0 0 10 0 0 0 0 0 0 0 0 0 146 0 0 0 0
7 883 14 1 0 0 1 25 29 80 0 70 10 0 0 0 0 0
8 24 0 0 6 0 1 5 313 0 0 0 135 0 0 0 0 0
9 0 0 0 0 396 0 0 0 0 0 0 0 0 0 0 0 0
10 0 0 97 0 0 0 0 0 0 311 0 0 0 0 0 0 0
11 10 0 0 2 0 2 408 2 0 0 1 4 0 1 0 0 0
12 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0
13 174 26 0 0 0 0 3 0 25 0 5 0 0 0 0 0 0
14 0 0 1 19 0 0 1 0 0 0 0 0 0 90 0 0 0
15 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 20 0
16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 11
17 0 0 0 0 0 0 0 0 0 0 0 0 8 0 0 0 0
18 0 0 1 0 1 0 0 0 0 0 0 0 0 0 55 0 0
19 0 0 0 0 0 12 0 0 0 0 0 0 0 0 0 0 0
'PBMC3 - contingency_matrix (rows: cellTypist - cols: seurat)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
1 1740 415 0 0 681 20 162 0 3 0 0 0 0 0 0 0 0 0
2 0 0 1013 0 0 0 0 0 0 0 0 227 0 229 3 0 0 0
3 0 0 0 0 0 0 2 535 0 2 0 1 2 0 0 120 0 0
4 16 6 0 0 3 886 188 0 0 0 0 0 0 0 0 1 0 0
5 0 1 4 1043 0 0 2 1 1 0 0 24 136 0 0 30 0 0
6 0 0 7 0 0 0 0 0 0 0 0 0 0 0 149 0 0 0
7 0 980 0 0 108 0 18 0 7 0 0 0 0 0 0 0 0 0
8 0 18 0 3 0 0 8 0 454 0 0 0 1 0 0 0 0 0
9 0 0 0 0 0 0 0 57 0 336 0 0 0 0 0 3 0 0
10 0 0 84 0 0 0 0 0 0 0 319 1 0 4 0 0 0 0
11 0 14 0 0 0 2 411 0 3 0 0 0 0 0 0 0 0 0
12 0 0 0 0 0 0 0 7 0 0 0 0 0 0 0 1 0 0
13 13 5 0 0 208 0 7 0 0 0 0 0 0 0 0 0 0 0
14 0 0 0 2 0 0 0 0 0 0 0 0 109 0 0 0 0 0
15 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 20
16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 11
17 0 0 0 0 0 0 0 0 0 0 0 0 0 0 8 0 0 0
18 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 0 52 0
19 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 12
'PBMC3 - contingency_matrix (rows: cellTypist - cols: COTAN)'
1 2 3 4 5 6 7 8 9 10 ... 23 24 25 26 27 28 29 30 31 32
1 1 48 6 205 1790 15 4 53 199 154 ... 0 0 0 0 0 0 0 0 2 0
2 0 0 0 0 0 0 0 0 0 0 ... 0 1 0 0 2 0 0 0 0 0
3 0 2 0 0 0 0 0 1 1 0 ... 67 125 220 131 0 0 2 0 0 0
4 0 152 1 89 18 805 0 2 7 20 ... 0 0 0 0 0 0 0 0 0 0
5 1 0 0 1 0 0 1 1 0 0 ... 3 0 3 0 30 4 138 1025 0 0
6 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
7 0 12 9 26 0 0 4 431 546 27 ... 0 0 0 0 0 0 1 0 0 0
8 0 4 465 0 0 0 0 1 8 0 ... 0 0 0 0 0 0 2 3 0 0
9 0 0 0 0 0 0 0 0 0 0 ... 0 0 30 15 0 0 0 0 0 0
10 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
11 83 272 2 9 0 1 0 0 7 8 ... 0 0 0 0 0 0 14 1 32 0
12 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 8 0
13 0 0 0 4 5 0 139 3 1 6 ... 0 0 0 0 0 0 10 0 0 0
14 0 0 0 1 0 0 0 0 1 0 ... 0 0 0 0 0 86 17 6 0 0
15 0 4 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 16 0
16 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 11 0
17 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
18 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 52
19 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 1 0 11 0

19 rows × 32 columns

'Initial antibody cell/cluster table:'
cluster.ids
9     2220
10    1635
7     1271
13    1067
5     1010
12     909
6      744
2      271
4      214
14     168
3      149
23     133
22      71
Name: count, dtype: int64
'PBMC3 - contingency_matrix (rows: antibody - cols: monocle)'
1 2 3
2 3 265 0
3 132 13 3
4 209 2 1
5 993 14 1
6 741 0 0
7 4 1226 0
9 2201 9 1
10 1616 12 3
12 905 2 1
13 2 43 1002
14 141 23 1
22 70 1 0
23 130 0 0
'PBMC3 - contingency_matrix (rows: antibody - cols: scanpy)'
1 2 3 4 5 6 7 8 9 10 ... 13 14 15 16 17 18 19 20 21 22
2 0 1 0 0 17 0 0 0 0 0 ... 1 223 0 25 0 0 1 0 0 0
3 0 14 2 0 0 1 5 0 3 0 ... 106 12 0 0 4 0 0 0 0 0
4 4 0 5 0 0 50 25 10 28 15 ... 1 0 57 0 1 2 0 0 1 0
5 0 912 0 0 0 0 2 0 0 0 ... 80 10 0 2 1 0 0 0 0 1
6 1 0 8 0 0 9 433 11 75 1 ... 3 0 151 1 0 0 0 0 0 0
7 0 0 1 0 735 0 0 1 0 0 ... 0 20 0 167 0 28 2 0 0 1
9 92 0 808 1 0 1 26 593 323 196 ... 0 3 19 1 0 5 0 0 7 0
10 1252 2 4 0 0 15 3 18 4 205 ... 3 1 5 3 6 4 2 0 20 2
12 25 1 0 0 1 737 34 0 8 2 ... 6 0 40 1 1 0 0 0 0 0
13 0 0 0 873 1 0 1 0 1 0 ... 0 6 0 1 128 2 2 30 0 0
14 0 17 0 0 0 0 4 0 2 1 ... 116 20 0 1 1 0 3 0 0 0
22 10 0 9 0 0 0 2 18 6 19 ... 0 0 1 0 0 0 1 0 1 0
23 0 0 90 0 0 0 0 18 1 2 ... 0 0 0 0 0 0 0 0 0 0

13 rows × 22 columns

'PBMC3 - contingency_matrix (rows: antibody - cols: scvi-tools)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
2 0 0 42 2 0 0 0 0 0 223 0 0 0 0 1 0 0
3 3 0 1 48 2 1 6 1 0 12 0 0 0 74 0 0 0
4 18 8 1 0 2 76 80 18 0 0 7 0 2 0 0 0 0
5 0 0 1 979 1 0 0 0 0 10 0 0 0 14 0 2 1
6 14 1 1 1 0 23 464 222 0 0 6 6 0 3 0 0 0
7 2 0 1182 0 0 0 0 0 0 18 0 0 26 0 1 1 0
9 1524 275 1 0 2 1 26 47 171 3 134 22 5 0 0 0 0
10 52 1281 1 6 4 15 4 2 149 2 101 4 6 0 2 2 0
12 0 5 2 4 1 816 67 5 2 0 1 5 0 0 0 0 0
13 0 0 31 1 995 0 1 1 8 7 0 0 2 0 1 0 0
14 0 0 1 129 1 0 0 3 0 20 0 1 0 7 3 0 0
22 39 14 0 0 0 0 0 4 4 0 6 3 0 0 1 0 0
23 111 2 0 0 0 0 0 0 13 0 4 0 0 0 0 0 0
'PBMC3 - contingency_matrix (rows: antibody - cols: seurat)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
2 0 0 18 1 0 0 0 0 1 0 226 20 1 0 0 0 1 0
3 0 3 0 25 0 1 1 0 2 0 12 0 101 0 0 2 0 1
4 11 28 0 0 12 48 97 0 12 0 0 0 1 0 2 1 0 0
5 0 0 0 980 0 0 0 0 0 0 10 1 14 0 0 1 0 2
6 1 22 0 0 6 16 468 0 225 0 0 1 2 0 0 0 0 0
7 0 2 873 0 0 0 0 0 0 0 22 139 0 164 27 0 2 1
9 129 1302 0 0 716 0 31 1 23 0 3 1 0 0 5 0 0 0
10 1543 8 1 2 46 3 6 0 2 0 0 3 3 0 4 4 2 4
12 6 0 1 0 3 816 72 0 4 0 0 1 4 0 0 1 0 0
13 0 0 1 1 0 0 0 549 1 327 7 30 0 1 2 126 2 0
14 0 1 0 14 1 0 0 0 3 0 20 1 121 0 0 1 3 0
22 12 19 0 0 34 0 3 0 2 0 0 0 0 0 0 0 1 0
23 0 4 0 0 126 0 0 0 0 0 0 0 0 0 0 0 0 0
'PBMC3 - contingency_matrix (rows: antibody - cols: COTAN)'
1 2 3 4 5 6 7 8 9 10 ... 23 24 25 26 27 28 29 30 31 32
2 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 14 0 1 2 0 1
3 1 0 1 3 0 1 0 0 1 0 ... 1 0 0 0 0 72 28 25 1 0
4 3 29 15 13 15 50 2 9 15 44 ... 0 0 0 0 0 0 2 0 1 0
5 0 0 0 1 0 0 0 0 0 0 ... 0 0 1 0 12 11 2 968 2 0
6 62 357 227 8 1 13 1 14 11 4 ... 0 0 0 0 0 1 15 0 18 0
7 0 0 0 1 0 0 0 1 0 0 ... 0 0 0 0 1 0 0 0 1 2
9 1 21 28 159 158 0 30 440 716 134 ... 0 0 1 0 0 0 8 0 2 0
10 0 5 3 3 1550 3 0 6 2 5 ... 0 0 0 0 0 0 4 2 2 2
12 6 47 4 79 5 739 0 0 1 3 ... 0 0 0 0 0 0 5 0 6 0
13 0 0 1 0 0 0 0 0 1 0 ... 65 116 233 128 1 1 0 0 8 2
14 0 0 3 0 0 0 1 1 1 0 ... 0 0 0 0 0 4 116 14 0 3
22 0 0 2 2 19 0 1 8 6 9 ... 0 0 0 0 0 0 0 0 0 1
23 0 0 0 4 0 0 109 4 0 1 ... 0 0 0 0 0 0 0 0 0 0

13 rows × 32 columns

print_clustering_data(tuning = 'default',dataset="PBMC4")
'Initial COTAN cluster number:'
24
'Initial monocle cluster number:'
24
'Initial scanpy cluster number:'
3
'Initial scvi-tools cluster number:'
22
'Initial seurat cluster number:'
16
'PBMC4 - contingency_matrix (rows: cellTypist - cols: monocle)'
1 2 3
1 407 0 0
2 11 0 797
3 1330 1 0
4 108 0 0
5 9 2178 13
6 308 0 0
7 77 0 0
8 538 0 0
9 358 1 0
10 0 307 0
11 1 1 222
12 0 28 0
13 8 3 2
14 106 0 0
15 0 92 1
16 0 59 0
'PBMC4 - contingency_matrix (rows: cellTypist - cols: scanpy)'
1 2 3 4 5 6 7 8 9 10 ... 13 14 15 16 17 18 19 20 21 22
1 10 0 0 0 0 0 0 384 1 0 ... 0 6 0 0 0 1 5 0 0 0
2 0 673 0 0 0 0 0 0 1 0 ... 0 0 0 0 0 0 0 0 0 34
3 300 0 0 0 496 385 0 1 0 0 ... 136 10 3 0 0 0 0 0 0 0
4 0 0 0 0 0 0 0 6 10 0 ... 0 7 0 0 0 0 85 0 0 0
5 1 0 596 456 0 0 427 0 0 281 ... 0 0 0 169 76 145 0 48 0 0
6 7 0 0 0 4 3 0 0 0 0 ... 8 99 187 0 0 0 0 0 0 0
7 3 0 0 0 0 0 0 1 0 0 ... 0 67 0 0 0 0 6 0 0 0
8 462 0 0 0 8 46 0 1 0 0 ... 19 2 0 0 0 0 0 0 0 0
9 0 0 0 0 0 0 0 5 348 0 ... 1 0 0 0 0 0 5 0 0 0
10 0 0 0 74 0 0 1 0 0 2 ... 0 0 0 10 2 1 0 1 0 0
11 0 46 0 0 0 0 0 0 0 0 ... 0 0 0 0 1 0 0 0 0 5
12 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 28 0 0
13 7 0 0 0 0 0 0 0 0 0 ... 6 0 0 0 0 0 0 0 0 0
14 42 0 0 0 1 13 0 1 0 0 ... 47 2 0 0 0 0 0 0 0 0
15 0 0 0 0 0 0 0 0 0 1 ... 0 0 0 2 88 2 0 0 0 0
16 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 59 0

16 rows × 22 columns

'PBMC4 - contingency_matrix (rows: cellTypist - cols: scvi-tools)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
1 0 0 4 1 0 0 1 0 315 0 0 0 86 0 0 0
2 800 0 2 1 0 0 2 0 1 0 1 0 1 0 0 0
3 0 939 357 2 0 0 20 0 1 0 0 0 0 0 0 12
4 0 0 0 0 0 0 105 2 0 0 0 0 1 0 0 0
5 4 0 1 805 746 413 0 4 2 1 167 15 1 41 0 0
6 0 8 8 0 0 0 291 0 1 0 0 0 0 0 0 0
7 0 0 3 0 0 0 67 1 6 0 0 0 0 0 0 0
8 0 31 483 0 0 0 3 0 6 0 0 0 0 0 0 15
9 0 0 1 2 0 0 0 355 0 0 0 0 1 0 0 0
10 0 0 0 14 0 94 1 0 0 194 4 0 0 0 0 0
11 217 0 0 0 0 0 0 0 0 0 7 0 0 0 0 0
12 0 0 0 0 0 0 0 0 0 0 0 0 0 28 0 0
13 0 1 1 0 0 0 0 6 0 0 0 0 0 0 5 0
14 1 9 89 0 0 0 4 0 1 0 0 0 0 0 0 2
15 0 0 0 2 0 3 0 0 0 0 1 87 0 0 0 0
16 0 0 0 0 0 0 0 0 0 0 0 1 0 0 58 0
'PBMC4 - contingency_matrix (rows: cellTypist - cols: seurat)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
1 5 0 0 0 0 0 398 0 0 4 0 0 0 0 0 0 0 0 0
2 0 0 0 0 632 0 1 0 122 1 0 0 0 0 0 0 0 52 0
3 356 854 0 0 0 0 0 0 0 2 116 0 3 0 0 0 0 0 0
4 0 0 0 0 0 0 0 1 0 107 0 0 0 0 0 0 0 0 0
5 2 0 780 723 0 447 2 0 2 0 1 1 0 136 28 35 0 0 43
6 31 15 0 0 0 0 0 0 0 68 5 0 189 0 0 0 0 0 0
7 3 0 0 0 0 0 3 0 0 69 2 0 0 0 0 0 0 0 0
8 525 0 0 0 0 0 1 0 0 2 10 0 0 0 0 0 0 0 0
9 0 0 0 0 0 0 3 354 1 0 1 0 0 0 0 0 0 0 0
10 0 0 0 7 0 86 0 0 0 0 0 213 0 1 0 0 0 0 0
11 0 0 0 0 1 0 0 0 223 0 0 0 0 0 0 0 0 0 0
12 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 28 0 0 0
13 10 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 1 0
14 4 0 0 0 0 0 0 0 0 1 101 0 0 0 0 0 0 0 0
15 0 0 0 1 0 1 0 0 0 0 0 0 0 0 90 0 0 0 1
16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 59 0 0
'PBMC4 - contingency_matrix (rows: cellTypist - cols: COTAN)'
1 2 3 4 5 6 7 8 9 10 ... 15 16 17 18 19 20 21 22 23 24
1 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 1 0 402 0 0 4
2 0 0 0 0 0 0 0 0 2 45 ... 0 0 0 0 1 0 1 0 1 0
3 0 0 0 0 0 0 0 0 0 0 ... 0 0 1 0 0 0 0 45 963 322
4 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 105 0 0 0 0 3
5 40 131 724 199 332 741 1 27 0 0 ... 0 0 0 0 0 0 2 0 0 1
6 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 4 209 95
7 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 3 0 4 2 0 68
8 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 1 0 7 530
9 0 0 0 0 0 0 0 0 0 0 ... 0 0 55 36 118 147 1 1 0 1
10 0 1 6 0 83 2 215 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
11 0 0 0 0 0 0 0 0 24 0 ... 0 0 0 0 0 0 0 0 0 0
12 0 0 0 0 0 0 0 0 0 0 ... 0 28 0 0 0 0 0 0 0 0
13 0 0 0 0 0 0 0 1 0 1 ... 0 0 8 0 0 0 0 0 1 1
14 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 93 1 12
15 1 0 1 0 1 0 0 90 0 0 ... 0 0 0 0 0 0 0 0 0 0
16 0 0 0 0 0 0 0 0 0 0 ... 59 0 0 0 0 0 0 0 0 0

16 rows × 24 columns

'Initial antibody cell/cluster table:'
cluster.ids
3     2280
1     1367
10    1018
9      488
2      351
14     348
4      242
5      224
24     194
26      64
22      43
12      41
Name: count, dtype: int64
'PBMC4 - contingency_matrix (rows: antibody - cols: monocle)'
1 2 3
1 1342 5 0
2 335 5 0
3 8 2158 0
4 241 0 0
5 16 195 1
9 480 1 0
10 13 39 931
12 38 0 0
14 343 2 0
22 42 0 0
24 193 0 0
26 63 0 0
'PBMC4 - contingency_matrix (rows: antibody - cols: scanpy)'
1 2 3 4 5 6 7 8 9 10 ... 13 14 15 16 17 18 19 20 21 22
1 744 0 0 0 195 247 0 8 1 0 ... 139 6 0 0 3 1 2 0 1 0
2 10 0 0 0 0 0 0 5 317 0 ... 2 0 0 0 3 0 3 0 0 0
3 1 0 573 514 1 0 405 0 0 273 ... 2 0 0 178 26 119 0 42 3 0
4 3 0 0 0 3 2 0 1 2 0 ... 8 30 174 0 0 0 18 0 0 0
5 0 0 1 0 0 0 1 0 14 0 ... 1 0 0 0 3 11 2 1 0 0
9 12 0 0 0 1 3 0 288 0 0 ... 2 130 8 0 1 0 36 0 0 0
10 1 657 1 0 0 0 1 0 6 1 ... 0 0 0 0 40 0 0 3 0 37
12 0 0 0 0 0 0 0 2 3 0 ... 0 2 0 0 0 0 31 0 0 0
14 10 0 0 0 205 105 0 0 0 0 ... 23 0 0 0 1 0 0 0 1 0
22 6 0 0 0 2 2 0 2 0 0 ... 0 24 6 0 0 0 0 0 0 0
24 9 0 0 0 97 75 0 0 1 0 ... 9 0 1 0 0 0 1 0 0 0
26 27 0 0 0 1 12 0 1 0 0 ... 22 0 0 0 0 0 0 0 0 0

12 rows × 22 columns

'PBMC4 - contingency_matrix (rows: antibody - cols: scvi-tools)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
1 0 454 839 0 1 1 8 1 12 0 0 2 1 0 1 27
2 0 1 4 1 0 0 0 328 0 0 0 3 1 0 2 0
3 0 2 0 719 730 476 1 0 2 15 168 14 1 35 3 0
4 0 4 2 0 0 0 230 3 2 0 0 0 0 0 0 0
5 0 0 0 10 1 7 0 19 0 170 0 3 0 1 1 0
9 0 2 8 0 0 0 175 0 288 0 0 1 7 0 0 0
10 921 0 3 36 4 1 0 5 0 2 8 0 1 2 0 0
12 0 0 0 0 0 0 38 0 0 0 0 0 0 0 0 0
14 0 324 17 0 0 0 0 0 0 0 0 1 0 0 1 2
22 0 3 4 0 0 0 27 0 8 0 0 0 0 0 0 0
24 0 185 4 0 0 0 2 1 0 0 0 0 1 0 0 0
26 1 4 58 0 0 0 0 0 0 0 0 0 0 0 0 0
'PBMC4 - contingency_matrix (rows: antibody - cols: seurat)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
1 853 323 0 0 0 0 9 1 0 5 151 0 0 1 3 0 1 0 0
2 9 0 0 0 0 0 2 324 0 0 0 0 0 0 4 0 0 1 0
3 4 1 750 693 0 519 1 0 0 0 1 25 0 113 26 30 3 0 0
4 1 11 0 0 0 0 1 2 0 46 2 0 178 0 0 0 0 0 0
5 1 0 1 2 0 2 1 15 0 0 0 177 0 9 3 1 0 0 0
9 28 5 0 0 0 0 292 0 0 140 9 0 6 0 1 0 0 0 0
10 1 0 3 3 598 0 0 4 304 0 0 1 0 0 0 2 0 30 37
12 0 0 0 0 0 0 0 1 0 36 1 0 0 0 0 0 0 0 0
14 10 326 0 0 0 0 0 0 0 0 6 0 1 0 1 0 1 0 0
22 12 11 0 0 0 0 4 0 0 9 0 0 6 0 0 0 0 0 0
24 3 186 0 0 0 0 0 1 0 1 1 0 1 0 0 0 0 0 0
26 3 0 0 0 0 0 0 0 0 0 60 0 0 0 0 0 0 0 0
'PBMC4 - contingency_matrix (rows: antibody - cols: COTAN)'
1 2 3 4 5 6 7 8 9 10 ... 15 16 17 18 19 20 21 22 23 24
1 0 1 0 0 0 0 0 3 0 0 ... 1 0 0 0 3 0 9 75 431 824
2 0 0 0 0 0 0 0 4 0 1 ... 0 0 57 29 102 142 1 0 0 4
3 0 110 687 195 395 719 26 25 0 0 ... 3 1 0 0 0 0 1 0 2 2
4 0 0 0 0 0 0 0 0 0 0 ... 0 0 1 0 19 0 1 1 191 28
5 0 9 2 0 1 2 178 3 0 0 ... 0 0 0 4 10 1 0 0 0 1
9 0 0 0 0 0 0 0 1 0 0 ... 0 0 0 0 41 0 292 6 12 129
10 33 1 4 1 2 2 1 0 24 25 ... 0 0 1 1 1 2 0 0 1 1
12 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 36 0 0 1 0 1
14 0 0 0 0 0 0 0 1 0 0 ... 1 0 0 0 0 0 0 1 333 9
22 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 6 0 17 19
24 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 1 0 1 0 187 4
26 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 57 1 5

12 rows × 24 columns

Against cellTypist cluster number

print_clustering_data(tuning = 'celltypist',dataset="PBMC1")
'Initial COTAN cluster number:'
14
'Initial monocle cluster number:'
14
'Initial scanpy cluster number:'
18
'Initial scvi-tools cluster number:'
17
'Initial seurat cluster number:'
20
'PBMC1 - contingency_matrix (rows: cellTypist - cols: monocle)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
1 0 1 3 1 273 0 0 0 237 227 147 0 6 6 0 77 1 0
2 66 0 0 0 0 230 228 218 0 0 0 145 25 0 31 0 0 0
3 3 0 0 0 0 0 2 0 0 0 0 0 36 0 6 0 0 0
4 0 0 0 0 0 0 0 0 0 0 5 0 0 0 0 0 73 0
5 200 0 0 7 0 1 0 0 0 0 0 1 65 0 35 0 0 0
6 0 142 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
7 36 0 0 40 0 0 0 0 0 0 0 0 6 0 0 0 0 0
8 0 1 267 10 0 0 0 0 0 0 0 0 1 0 0 0 0 0
9 2 0 0 0 0 22 15 20 0 0 0 21 0 0 1 0 0 0
10 0 0 0 0 1 0 0 0 0 2 37 0 0 131 0 0 0 0
11 0 0 21 49 0 0 0 0 0 0 0 0 0 0 0 0 0 0
12 27 0 0 179 0 3 2 1 0 0 0 4 14 0 10 0 0 0
13 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 28
14 0 155 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
15 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 2 3 0
'PBMC1 - contingency_matrix (rows: cellTypist - cols: scanpy)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
1 0 0 369 243 292 0 0 0 0 1 0 0 0 0 0 74 0
2 474 89 0 0 0 258 0 2 0 0 0 111 0 8 0 1 0
3 0 45 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0
4 0 0 0 2 0 0 0 0 0 0 0 0 0 0 76 0 0
5 2 260 0 0 0 2 0 42 0 0 0 3 0 0 0 0 0
6 0 0 0 0 0 0 0 0 0 4 138 0 0 0 0 0 0
7 0 8 0 0 0 0 0 55 19 0 0 0 0 0 0 0 0
8 0 0 0 0 0 0 263 0 16 0 0 0 0 0 0 0 0
9 5 2 0 0 0 5 0 1 0 0 0 2 0 66 0 0 0
10 0 0 0 78 2 0 0 0 0 0 0 0 91 0 0 0 0
11 0 0 0 0 0 0 5 0 65 0 0 0 0 0 0 0 0
12 0 11 0 0 0 6 0 143 78 0 0 0 0 2 0 0 0
13 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 28
14 0 0 0 0 0 0 0 0 0 154 1 0 0 0 0 0 0
15 0 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
'PBMC1 - contingency_matrix (rows: cellTypist - cols: scvi-tools)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
1 1 353 261 1 2 0 1 183 0 4 0 152 0 1 19 1 0 0 0 0
2 43 0 0 278 0 227 187 0 180 0 0 0 0 0 0 0 9 16 0 3
3 4 0 0 9 1 0 0 0 2 0 0 0 0 0 0 0 0 31 0 0
4 0 0 5 0 0 0 0 0 0 0 0 0 0 0 73 0 0 0 0 0
5 279 0 0 11 4 9 2 0 3 0 0 0 0 0 0 0 0 1 0 0
6 0 0 0 0 0 0 0 0 0 0 142 0 0 0 0 0 0 0 0 0
7 55 0 0 0 26 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
8 0 0 0 0 1 0 0 0 0 152 1 0 0 125 0 0 0 0 0 0
9 1 0 0 1 1 3 1 0 0 0 0 0 0 0 0 0 72 0 0 2
10 0 0 86 0 0 0 0 4 0 0 0 0 0 0 0 81 0 0 0 0
11 0 0 0 0 65 0 0 0 0 5 0 0 0 0 0 0 0 0 0 0
12 45 0 0 1 173 1 0 0 2 0 0 0 0 0 0 0 0 1 0 17
13 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 28 0
14 0 0 0 0 0 0 0 0 0 0 9 0 145 0 0 0 0 0 0 1
15 0 0 0 0 3 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0
'PBMC1 - contingency_matrix (rows: cellTypist - cols: seurat)'
1 2 3 4 5 6 7 8 9 10 ... 12 13 14 15 16 17 18 19 20 21
1 0 0 351 267 0 229 0 0 0 0 ... 0 3 0 1 88 0 0 40 0 0
2 423 119 0 0 238 0 154 0 0 0 ... 0 0 0 0 0 0 7 0 1 0
3 1 15 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 31 0
4 0 0 0 4 0 0 0 0 0 0 ... 0 0 0 0 0 74 0 0 0 0
5 0 258 0 0 0 0 1 0 1 0 ... 0 0 0 0 0 0 0 0 2 0
6 0 0 0 0 0 0 0 0 0 0 ... 142 0 0 0 0 0 0 0 0 0
7 0 4 0 0 0 0 0 0 13 0 ... 0 0 12 1 0 0 0 0 0 0
8 0 0 0 0 0 0 0 151 0 0 ... 0 0 5 123 0 0 0 0 0 0
9 10 2 0 0 11 0 11 0 1 0 ... 0 0 0 0 0 0 46 0 0 0
10 0 0 0 38 0 0 0 0 0 0 ... 0 130 0 0 3 0 0 0 0 0
11 0 0 0 0 0 0 0 4 0 0 ... 0 0 66 0 0 0 0 0 0 0
12 1 3 0 0 4 0 0 0 138 0 ... 0 0 44 0 0 0 2 0 1 0
13 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 1 0 0 0 27
14 0 0 0 0 0 0 0 0 0 153 ... 2 0 0 0 0 0 0 0 0 0
15 0 1 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 5 0 0 0

15 rows × 21 columns

'PBMC1 - contingency_matrix (rows: cellTypist - cols: COTAN)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14
1 35 0 11 648 284 0 0 0 0 0 1 0 0 0
2 0 0 0 0 0 0 0 0 7 0 0 0 807 129
3 0 0 0 0 0 0 0 0 0 0 0 37 2 8
4 0 73 2 3 0 0 0 0 0 0 0 0 0 0
5 0 0 0 0 0 0 0 0 0 0 0 1 0 308
6 0 0 0 0 0 0 0 142 0 0 0 0 0 0
7 0 0 0 0 0 0 0 0 0 25 1 0 0 56
8 0 0 0 0 0 0 0 0 55 4 220 0 0 0
9 0 0 0 0 0 0 0 0 34 1 0 0 45 1
10 0 1 146 23 0 0 0 0 0 0 0 0 1 0
11 0 0 0 0 0 0 0 0 2 67 1 0 0 0
12 0 0 0 0 0 0 0 0 3 175 0 1 5 56
13 0 1 0 0 0 27 0 0 0 0 0 0 0 0
14 0 0 0 0 0 0 152 3 0 0 0 0 0 0
15 0 0 0 0 2 0 0 0 4 0 0 0 0 0
print_clustering_data(tuning = 'celltypist',dataset="PBMC2")
'Initial COTAN cluster number:'
17
'Initial monocle cluster number:'
17
'Initial scanpy cluster number:'
18
'Initial scvi-tools cluster number:'
20
'Initial seurat cluster number:'
19
'PBMC2 - contingency_matrix (rows: cellTypist - cols: monocle)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
1 1 0 0 1 0 5 0 55 0 0 12 38 6 90 1 0 0 22
2 0 0 0 0 0 279 0 6 22 0 53 22 24 1 19 0 0 1
3 0 1 332 1 407 130 338 27 297 36 202 95 99 38 132 0 0 7
4 577 1 0 6 0 0 0 2 0 0 0 2 2 42 0 0 0 75
5 1 0 0 0 0 15 0 258 0 0 9 9 2 13 0 0 0 9
6 0 16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 77 0
7 0 566 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
8 0 0 142 0 32 4 69 4 11 292 24 59 4 2 31 0 0 0
9 0 13 0 0 0 0 0 0 0 0 0 0 0 0 0 173 0 0
10 43 0 0 0 0 0 0 1 0 0 0 0 0 5 0 0 0 3
11 0 0 0 228 0 0 0 0 0 0 0 0 0 0 0 0 0 0
12 0 0 0 204 0 0 0 0 0 0 0 0 0 0 0 0 0 0
13 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 48 0
14 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 14 0
15 0 0 0 0 0 4 0 1 3 0 0 0 66 3 3 0 0 0
16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 0
'PBMC2 - contingency_matrix (rows: cellTypist - cols: scanpy)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
1 0 0 0 0 0 1 0 2 1 0 15 2 57 5 0 148 0 0 0 0
2 0 0 0 0 0 0 131 200 35 2 2 0 1 55 0 1 0 0 0 0
3 21 575 490 0 0 0 230 107 248 286 10 2 48 98 0 26 0 0 0 1
4 0 0 0 0 471 8 0 1 0 0 2 222 0 0 0 2 1 0 0 0
5 0 0 0 0 0 0 1 1 0 0 253 0 3 52 0 6 0 0 0 0
6 0 0 0 5 0 0 0 0 0 0 0 0 0 0 0 0 0 88 0 0
7 0 0 0 464 0 0 0 0 0 0 0 0 0 0 15 0 88 0 0 0
8 558 2 0 0 0 0 1 0 0 0 0 0 112 1 0 0 0 0 0 0
9 0 0 0 12 0 0 0 0 0 0 0 0 0 0 174 0 0 0 0 0
10 0 0 0 0 0 0 0 0 0 0 0 50 0 2 0 0 0 0 0 0
11 0 0 0 0 0 228 0 0 0 0 0 0 0 0 0 0 0 0 0 0
12 0 0 0 0 0 204 0 0 0 0 0 0 0 0 0 0 0 0 0 0
13 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 48 0
14 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 13
15 0 0 0 0 0 0 10 44 17 1 1 1 2 3 0 1 0 0 0 0
16 0 0 0 0 0 0 0 1 5 0 0 0 0 0 0 0 0 0 0 0
'PBMC2 - contingency_matrix (rows: cellTypist - cols: scvi-tools)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
1 0 0 0 0 1 0 217 0 0 0 7 0 3 0 3 0 0 0 0
2 348 2 0 0 0 4 7 0 2 3 60 0 1 0 0 0 0 0 0
3 469 4 1 15 1 412 69 356 324 322 165 0 4 0 0 0 0 0 0
4 1 667 1 0 8 0 6 0 0 0 0 0 0 0 1 23 0 0 0
5 10 2 0 0 0 0 7 0 1 0 65 0 159 0 72 0 0 0 0
6 0 0 9 0 0 0 0 0 0 0 0 0 0 80 0 0 4 0 0
7 0 0 556 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 9
8 1 0 1 561 0 2 99 1 2 2 5 0 0 0 0 0 0 0 0
9 0 0 13 0 0 0 0 0 0 0 0 173 0 0 0 0 0 0 0
10 0 5 0 0 0 0 0 0 0 0 0 0 0 0 0 47 0 0 0
11 0 0 0 0 228 0 0 0 0 0 0 0 0 0 0 0 0 0 0
12 0 0 0 0 204 0 0 0 0 0 0 0 0 0 0 0 0 0 0
13 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 48 0 0
14 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 14 0
15 61 0 0 0 0 3 2 1 1 3 9 0 0 0 0 0 0 0 0
16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 0
'PBMC2 - contingency_matrix (rows: cellTypist - cols: seurat)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
1 2 0 0 0 0 0 0 0 7 1 0 1 41 178 0 0 1 0 0 0
2 389 0 0 0 0 0 0 1 10 3 0 23 1 0 0 0 0 0 0 0
3 259 0 578 568 15 0 1 290 188 0 2 169 62 8 0 0 2 0 0 0
4 0 635 0 0 0 0 7 0 2 0 0 0 0 3 0 1 59 0 0 0
5 8 0 0 0 0 0 0 0 1 268 0 32 1 6 0 0 0 0 0 0
6 0 0 0 0 0 11 0 0 0 0 0 0 0 0 0 0 0 82 0 0
7 0 0 0 0 0 305 0 0 0 0 262 0 0 0 0 0 0 0 0 0
8 1 0 8 0 541 0 0 0 1 0 0 3 119 1 0 0 0 0 0 0
9 0 0 0 0 0 8 0 0 0 0 6 0 0 0 172 0 0 0 0 0
10 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 50 0 0 0
11 0 0 0 0 0 0 83 0 0 0 0 0 0 0 0 145 0 0 0 0
12 0 0 0 0 0 0 201 0 0 1 0 0 0 0 0 2 0 0 0 0
13 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 48 0
14 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 14
15 1 0 0 0 0 0 0 0 76 0 0 0 0 2 0 0 1 0 0 0
16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6
'PBMC2 - contingency_matrix (rows: cellTypist - cols: COTAN)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
1 0 0 0 3 4 220 3 0 0 0 1 0 0 0 0 0 0
2 0 0 0 416 9 0 2 0 0 0 0 0 0 0 0 0 0
3 0 2 1186 847 31 72 3 0 0 0 0 0 0 0 0 0 1
4 639 56 0 0 2 3 0 0 0 0 0 0 0 0 1 5 1
5 0 0 0 39 0 7 270 0 0 0 0 0 0 0 0 0 0
6 0 0 0 0 0 0 0 0 0 86 0 7 0 0 0 0 0
7 1 0 0 0 0 0 0 0 0 0 88 300 174 0 0 3 1
8 0 0 568 9 1 96 0 0 0 0 0 0 0 0 0 0 0
9 0 0 0 0 0 0 0 154 24 0 0 8 0 0 0 0 0
10 1 51 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
11 0 0 0 0 0 0 0 0 0 0 0 0 0 0 145 69 14
12 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 22 180
13 0 0 0 0 0 0 0 0 0 0 0 0 0 48 0 0 0
14 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 13 1
15 0 1 0 6 72 1 0 0 0 0 0 0 0 0 0 0 0
16 0 0 0 1 0 1 0 0 0 0 1 0 3 0 0 0 0
print_clustering_data(tuning = 'celltypist',dataset="PBMC3")
'Initial COTAN cluster number:'
23
'Initial monocle cluster number:'
23
'Initial scanpy cluster number:'
17
'Initial scvi-tools cluster number:'
18
'Initial seurat cluster number:'
20
'PBMC3 - contingency_matrix (rows: cellTypist - cols: monocle)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
1 860 0 0 0 38 559 313 338 231 324 158 0 195 0 0 5 0
2 0 1041 0 0 0 0 1 0 0 0 0 390 0 2 7 0 31
3 0 0 0 654 0 0 2 2 1 0 0 0 1 0 0 0 2
4 400 0 0 0 12 309 29 47 12 157 78 0 56 0 0 0 0
5 0 22 1036 33 22 0 0 0 0 0 3 2 1 0 2 121 0
6 0 11 0 0 0 0 0 0 0 0 0 1 0 0 144 0 0
7 0 0 0 0 134 1 247 305 248 0 135 1 42 0 0 0 0
8 0 0 3 0 435 0 3 7 0 0 12 0 20 0 0 4 0
9 0 0 0 396 0 0 0 0 0 0 0 0 0 0 0 0 0
10 0 73 0 0 0 0 0 0 0 0 0 4 0 328 2 0 1
11 0 0 8 0 277 2 21 26 1 0 47 0 46 0 0 2 0
12 0 0 0 8 0 0 0 0 0 0 0 0 0 0 0 0 0
13 11 0 0 0 0 8 157 9 19 1 7 0 21 0 0 0 0
14 0 0 79 0 12 0 0 0 0 0 0 0 0 0 1 19 0
15 1 0 0 0 0 0 0 3 0 0 0 0 0 0 16 0 0
16 0 0 0 0 0 0 0 0 0 0 0 11 0 0 0 0 0
17 0 0 0 0 0 0 0 0 0 0 0 0 0 0 8 0 0
18 0 0 0 0 0 0 0 0 0 0 0 0 0 0 57 0 0
19 0 0 0 0 0 0 0 0 0 0 0 0 0 0 12 0 0
'PBMC3 - contingency_matrix (rows: cellTypist - cols: scanpy)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
1 1670 0 0 212 0 32 234 621 23 228 0 0 1 0 0 0 0 0
2 0 1228 0 0 0 0 0 0 0 0 2 202 0 0 5 0 35 0
3 0 0 0 0 543 0 5 0 0 1 0 0 0 111 0 0 2 0
4 29 0 0 0 0 811 192 5 4 59 0 0 0 0 0 0 0 0
5 0 0 1028 0 0 0 0 0 7 0 0 29 145 33 0 0 0 0
6 0 7 0 0 0 0 0 0 0 0 0 0 0 0 149 0 0 0
7 1 0 0 702 0 0 230 99 7 73 0 1 0 0 0 0 0 0
8 0 0 3 0 0 0 47 0 433 0 0 0 1 0 0 0 0 0
9 0 0 0 0 395 0 0 0 0 0 0 0 0 1 0 0 0 0
10 0 94 0 0 0 0 0 0 0 0 311 1 0 0 1 0 1 0
11 0 0 2 0 0 0 116 0 290 21 0 0 1 0 0 0 0 0
12 0 0 0 0 0 0 0 0 0 0 0 0 0 8 0 0 0 0
13 16 0 0 44 0 0 10 55 1 107 0 0 0 0 0 0 0 0
14 0 0 11 0 0 0 4 0 5 1 0 0 90 0 0 0 0 0
15 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 20
16 0 0 0 0 0 0 0 0 0 0 0 11 0 0 0 0 0 0
17 0 0 0 0 0 0 0 0 0 0 0 0 0 0 8 0 0 0
18 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 57 0 0
19 0 0 0 0 0 0 0 0 0 0 0 0 0 12 0 0 0 0
'PBMC3 - contingency_matrix (rows: cellTypist - cols: scvi-tools)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
1 2 657 1415 2 2 43 614 91 5 0 64 0 14 49 0 0 32 31 0 0
2 1463 0 0 0 0 0 0 0 0 1 0 8 0 0 0 0 0 0 0 0
3 4 0 1 0 654 0 2 1 0 0 0 0 0 0 0 0 0 0 0 0
4 0 6 13 1 0 856 4 168 0 0 0 0 0 52 0 0 0 0 0 0
5 29 0 0 1172 32 0 0 2 0 0 1 0 0 0 5 0 0 0 1 0
6 9 0 0 0 0 0 0 0 0 0 0 147 0 0 0 0 0 0 0 0
7 1 827 5 0 0 1 187 35 22 0 0 0 24 8 0 0 1 2 0 0
8 0 30 0 2 0 1 3 12 305 0 131 0 0 0 0 0 0 0 0 0
9 0 0 0 0 396 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
10 83 0 0 0 0 0 0 0 0 324 0 1 0 0 0 0 0 0 0 0
11 0 15 0 2 1 3 1 399 3 0 5 0 0 0 1 0 0 0 0 0
12 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 8
13 0 49 21 0 0 0 22 6 0 0 0 0 131 2 0 0 2 0 0 0
14 0 0 0 31 0 0 0 0 0 0 0 0 0 0 80 0 0 0 0 0
15 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 20 0
16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 11 0 0 0 0
17 0 0 0 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0
18 0 0 0 0 0 0 0 0 0 0 0 5 0 0 0 52 0 0 0 0
19 0 0 0 0 0 0 0 0 0 0 12 0 0 0 0 0 0 0 0 0
'PBMC3 - contingency_matrix (rows: cellTypist - cols: seurat)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
1 1744 412 0 0 681 20 161 0 3 0 0 0 0 0 0 0 0 0
2 0 0 1011 0 0 0 0 0 0 0 0 227 0 231 3 0 0 0
3 0 0 0 0 0 0 2 535 0 2 0 1 2 0 0 120 0 0
4 16 6 0 0 3 886 188 0 0 0 0 0 0 0 0 1 0 0
5 0 1 4 1043 0 0 2 1 1 0 0 24 136 0 0 30 0 0
6 0 0 7 0 0 0 0 0 0 0 0 0 0 0 149 0 0 0
7 0 995 0 0 93 0 18 0 7 0 0 0 0 0 0 0 0 0
8 0 18 0 3 0 0 8 0 454 0 0 0 1 0 0 0 0 0
9 0 0 0 0 0 0 0 57 0 336 0 0 0 0 0 3 0 0
10 0 0 89 0 0 0 0 0 0 0 314 1 0 4 0 0 0 0
11 0 14 0 0 0 2 411 0 3 0 0 0 0 0 0 0 0 0
12 0 0 0 0 0 0 0 7 0 0 0 0 0 0 0 1 0 0
13 13 8 0 0 205 0 7 0 0 0 0 0 0 0 0 0 0 0
14 0 0 0 2 0 0 0 0 0 0 0 0 109 0 0 0 0 0
15 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 20
16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 11
17 0 0 0 0 0 0 0 0 0 0 0 0 0 0 8 0 0 0
18 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 0 52 0
19 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 12
'PBMC3 - contingency_matrix (rows: cellTypist - cols: COTAN)'
1 2 3 4 5 6 7 8 9 10 ... 14 15 16 17 18 19 20 21 22 23
1 1822 80 633 127 291 53 12 0 0 3 ... 0 0 0 0 0 0 0 0 0 0
2 0 0 1 0 0 0 0 1 1 0 ... 68 517 404 324 0 0 0 1 0 0
3 1 0 2 2 0 2 0 0 0 0 ... 0 0 0 0 9 136 387 108 15 0
4 482 430 156 9 5 17 1 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
5 0 0 0 1 0 138 2 28 281 755 ... 0 1 0 0 0 25 5 0 0 0
6 0 0 0 0 0 0 0 0 0 0 ... 5 1 4 0 0 0 0 0 0 0
7 0 0 87 955 53 11 7 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
8 0 0 1 16 1 8 454 0 3 1 ... 0 0 0 0 0 0 0 0 0 0
9 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 359 2 34 0 1 0
10 0 0 0 0 0 0 0 0 0 0 ... 82 31 91 0 0 0 0 0 0 0
11 5 1 86 4 11 29 255 0 24 15 ... 0 0 0 0 0 0 0 0 0 0
12 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 5 3 0 0 0
13 7 0 210 5 1 10 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
14 0 0 0 1 0 10 0 0 100 0 ... 0 0 0 0 0 0 0 0 0 0
15 0 0 3 0 0 1 0 0 0 0 ... 0 0 0 0 0 0 0 0 16 0
16 0 0 0 0 0 0 0 0 0 0 ... 0 11 0 0 0 0 0 0 0 0
17 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
18 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 52
19 0 0 0 0 0 1 0 0 0 1 ... 0 0 0 0 0 1 1 0 8 0

19 rows × 23 columns

print_clustering_data(tuning = 'celltypist',dataset="PBMC4")
'Initial COTAN cluster number:'
15
'Initial monocle cluster number:'
21
'Initial scanpy cluster number:'
16
'Initial scvi-tools cluster number:'
18
'Initial seurat cluster number:'
18
'PBMC4 - contingency_matrix (rows: cellTypist - cols: monocle)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
1 0 0 9 0 0 0 0 12 0 385 1 0 0 0 0 0
2 756 2 1 0 0 0 0 2 0 3 3 0 0 0 0 41
3 0 707 149 0 0 0 379 0 3 0 93 0 0 0 0 0
4 0 0 1 0 0 0 0 98 0 8 1 0 0 0 0 0
5 5 2 2 648 642 406 0 1 422 3 2 3 35 0 29 0
6 0 196 83 0 0 0 19 1 0 0 9 0 0 0 0 0
7 0 0 51 0 0 0 0 9 0 15 2 0 0 0 0 0
8 0 2 417 0 0 0 77 0 0 5 37 0 0 0 0 0
9 0 0 0 0 0 0 0 351 1 3 4 0 0 0 0 0
10 0 0 0 2 3 76 0 0 5 0 0 219 2 0 0 0
11 220 0 0 0 0 0 0 0 1 0 1 0 0 0 0 2
12 0 0 0 0 0 0 0 0 0 0 0 0 0 0 28 0
13 1 2 2 0 0 0 0 0 0 4 0 0 0 4 0 0
14 0 4 4 0 0 0 1 0 0 0 97 0 0 0 0 0
15 0 0 0 11 5 0 0 0 1 0 0 1 72 3 0 0
16 0 0 0 0 0 0 0 0 0 0 0 0 0 59 0 0
'PBMC4 - contingency_matrix (rows: cellTypist - cols: scanpy)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
1 0 10 0 0 0 0 385 5 1 0 0 0 0 1 5 0 0 0
2 0 0 0 673 0 0 0 0 1 134 0 0 0 0 0 0 0 0
3 0 346 0 0 509 387 1 10 0 0 0 0 0 0 0 78 0 0
4 0 0 0 0 0 0 6 7 10 0 0 0 0 0 85 0 0 0
5 928 1 822 0 0 0 0 0 0 0 1 181 78 142 0 0 47 0
6 0 7 0 0 5 2 0 286 0 0 0 0 0 0 0 8 0 0
7 0 3 0 0 0 0 1 67 0 0 0 0 0 0 6 0 0 0
8 0 483 0 0 8 43 1 2 0 0 0 0 0 0 0 1 0 0
9 0 1 0 0 0 0 5 0 348 0 0 0 0 0 5 0 0 0
10 74 0 2 0 0 0 0 0 0 0 216 10 2 2 0 0 1 0
11 0 0 0 46 0 0 0 0 0 177 0 0 1 0 0 0 0 0
12 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 28 0
13 0 13 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
14 0 68 0 0 1 22 1 2 0 0 0 0 0 0 0 12 0 0
15 0 0 0 0 0 0 0 0 0 0 0 2 89 2 0 0 0 0
16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 59
'PBMC4 - contingency_matrix (rows: cellTypist - cols: scvi-tools)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
1 3 0 0 0 0 0 0 0 313 0 0 1 0 0 88 2 0 0
2 1 1 0 0 687 0 1 1 10 0 104 2 0 0 0 1 0 0
3 439 840 1 0 0 0 0 21 0 0 0 0 30 0 0 0 0 0
4 0 0 0 0 0 0 7 1 0 0 0 100 0 0 0 0 0 0
5 1 1 776 699 2 508 15 0 0 65 2 1 0 24 2 84 0 20
6 10 8 0 0 0 0 0 287 1 0 0 1 1 0 0 0 0 0
7 5 0 0 0 0 0 0 33 5 0 0 34 0 0 0 0 0 0
8 507 15 0 0 0 0 1 3 2 0 0 0 10 0 0 0 0 0
9 0 0 0 0 0 0 358 0 0 0 0 0 1 0 0 0 0 0
10 0 0 13 1 0 27 0 0 0 265 0 0 0 0 0 1 0 0
11 0 0 0 0 5 0 0 0 0 0 219 0 0 0 0 0 0 0
12 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 28
13 6 0 0 0 0 0 0 6 1 0 0 0 0 0 0 0 0 0
14 16 5 0 0 0 0 0 0 0 0 0 4 81 0 0 0 0 0
15 0 0 2 0 0 1 1 0 0 0 0 0 0 89 0 0 0 0
16 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 57 0
'PBMC4 - contingency_matrix (rows: cellTypist - cols: seurat)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
1 5 0 0 0 0 0 398 0 0 4 0 0 0 0 0 0 0 0 0
2 0 0 0 0 632 0 1 0 122 1 0 0 0 0 0 0 0 52 0
3 361 843 0 0 0 0 0 0 0 2 122 0 3 0 0 0 0 0 0
4 0 0 0 0 0 0 0 1 0 107 0 0 0 0 0 0 0 0 0
5 2 0 771 732 0 447 2 0 2 0 1 1 0 136 28 35 0 0 43
6 31 15 0 0 0 0 0 0 0 68 5 0 189 0 0 0 0 0 0
7 3 0 0 0 0 0 3 0 0 69 2 0 0 0 0 0 0 0 0
8 525 0 0 0 0 0 1 0 0 2 10 0 0 0 0 0 0 0 0
9 0 0 0 0 0 0 3 354 1 0 1 0 0 0 0 0 0 0 0
10 0 0 0 7 0 86 0 0 0 0 0 213 0 1 0 0 0 0 0
11 0 0 0 0 1 0 0 0 223 0 0 0 0 0 0 0 0 0 0
12 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 28 0 0 0
13 10 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 1 0
14 4 0 0 0 0 0 0 0 0 1 101 0 0 0 0 0 0 0 0
15 0 0 0 1 0 1 0 0 0 0 0 0 0 0 90 0 0 0 1
16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 59 0 0
'PBMC4 - contingency_matrix (rows: cellTypist - cols: COTAN)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
1 0 0 0 0 0 0 4 0 1 402 0 0 0 0 0
2 0 0 0 0 0 0 0 1 1 1 47 344 414 0 0
3 0 0 0 0 0 0 367 963 1 0 0 0 0 0 0
4 0 0 0 0 0 0 3 0 105 0 0 0 0 0 0
5 131 724 531 741 67 1 1 0 0 2 0 1 1 0 0
6 0 0 0 0 0 0 99 209 0 0 0 0 0 0 0
7 0 0 0 0 0 0 70 0 3 4 0 0 0 0 0
8 0 0 0 0 0 0 530 7 0 1 0 0 0 0 0
9 0 0 0 0 0 0 2 0 356 1 0 0 0 0 0
10 1 6 83 2 0 215 0 0 0 0 0 0 0 0 0
11 0 0 0 0 0 0 0 0 0 0 24 199 1 0 0
12 0 0 0 0 0 0 0 0 0 0 0 0 0 0 28
13 0 0 0 0 1 0 1 1 8 0 1 1 0 0 0
14 0 0 0 0 0 0 105 1 0 0 0 0 0 0 0
15 0 1 1 0 91 0 0 0 0 0 0 0 0 0 0
16 0 0 0 0 0 0 0 0 0 0 0 0 0 59 0

Against antibody cluster number

print_clustering_data(tuning = 'antibody',dataset="PBMC1")
'Initial COTAN cluster number:'
11
'Initial monocle cluster number:'
11
'Initial scanpy cluster number:'
9
'Initial scvi-tools cluster number:'
11
'Initial seurat cluster number:'
10
'Initial antibody cell/cluster table:'
cluster.ids
7     1338
8      876
3      748
4      341
9      331
5      211
1      202
6      131
2       62
10      51
12      16
Name: count, dtype: int64
'PBMC1 - contingency_matrix (rows: antibody - cols: monocle)'
1 2 3 4 5 6 7 8 9
1 94 0 17 50 0 0 0 0 0
2 0 4 0 0 3 0 38 1 0
3 35 0 500 65 0 0 0 0 0
4 0 1 0 262 0 0 0 0 0
5 2 0 29 127 0 0 0 0 0
6 0 1 0 0 0 1 1 58 26
7 2 731 0 4 0 275 95 17 1
8 776 0 30 6 1 0 0 0 1
9 0 0 0 1 294 0 0 0 0
10 0 0 0 44 0 0 0 0 0
12 0 0 0 10 0 0 0 0 0
'PBMC1 - contingency_matrix (rows: antibody - cols: scanpy)'
1 2 3 4 5 6 7 8 9 10 11
1 22 0 7 60 0 0 0 0 0 0 72
2 0 0 0 0 5 0 2 1 0 38 0
3 67 0 408 125 0 0 0 0 0 0 0
4 0 1 0 13 0 249 0 0 0 0 0
5 2 0 10 146 0 0 0 0 0 0 0
6 3 0 1 0 0 0 0 0 82 1 0
7 2 702 1 0 348 0 0 0 20 52 0
8 780 0 21 6 0 1 1 0 1 0 4
9 0 0 0 1 0 1 156 137 0 0 0
10 0 0 0 37 0 7 0 0 0 0 0
12 0 0 0 0 0 10 0 0 0 0 0
'PBMC1 - contingency_matrix (rows: antibody - cols: scvi-tools)'
1 2 3 4 5 6 7 8 9 10
1 28 0 0 22 0 1 36 0 74 0
2 0 3 42 0 1 0 0 0 0 0
3 123 0 0 434 0 0 43 0 0 0
4 0 1 0 0 0 259 3 0 0 0
5 0 0 0 17 0 0 138 0 3 0
6 0 1 1 0 0 0 0 56 3 26
7 1 644 451 0 0 1 0 27 0 1
8 786 0 0 19 1 2 3 0 2 1
9 0 0 0 0 294 1 0 0 0 0
10 0 0 0 1 0 9 34 0 0 0
12 0 0 0 0 0 10 0 0 0 0
'PBMC1 - contingency_matrix (rows: antibody - cols: seurat)'
1 2 3 4 5 6 7 8 9 10 11
1 93 14 0 0 0 54 0 0 0 0 0
2 0 0 1 4 0 0 1 1 39 0 0
3 22 536 0 0 0 42 0 0 0 0 0
4 0 0 1 0 261 1 0 0 0 0 0
5 1 18 0 0 0 139 0 0 0 0 0
6 3 1 0 1 0 0 0 0 1 56 25
7 2 1 524 488 2 0 0 0 89 18 1
8 766 40 0 0 1 5 1 0 0 0 1
9 0 0 0 0 2 0 151 142 0 0 0
10 0 0 0 0 8 36 0 0 0 0 0
12 0 0 0 0 8 2 0 0 0 0 0
'PBMC1 - contingency_matrix (rows: antibody - cols: COTAN)'
1 2 3 4 5 6 7 8 9 10 11
1 0 0 0 0 0 0 0 36 54 12 59
2 1 1 40 2 0 0 2 0 0 0 0
3 0 0 0 0 0 0 0 0 39 528 33
4 1 0 0 0 0 0 0 48 214 0 0
5 0 0 0 0 0 0 0 1 135 22 0
6 0 56 1 1 1 25 0 3 0 0 0
7 33 17 118 668 284 1 0 0 1 1 2
8 0 0 0 0 0 1 1 9 4 34 765
9 0 0 0 0 0 0 293 0 2 0 0
10 0 0 0 0 0 0 0 4 40 0 0
12 0 0 0 0 0 0 0 4 6 0 0
print_clustering_data(tuning = 'antibody',dataset="PBMC1")
'Initial COTAN cluster number:'
11
'Initial monocle cluster number:'
11
'Initial scanpy cluster number:'
9
'Initial scvi-tools cluster number:'
11
'Initial seurat cluster number:'
10
'Initial antibody cell/cluster table:'
cluster.ids
7     1338
8      876
3      748
4      341
9      331
5      211
1      202
6      131
2       62
10      51
12      16
Name: count, dtype: int64
'PBMC1 - contingency_matrix (rows: antibody - cols: monocle)'
1 2 3 4 5 6 7 8 9
1 94 0 17 50 0 0 0 0 0
2 0 4 0 0 3 0 38 1 0
3 35 0 500 65 0 0 0 0 0
4 0 1 0 262 0 0 0 0 0
5 2 0 29 127 0 0 0 0 0
6 0 1 0 0 0 1 1 58 26
7 2 731 0 4 0 275 95 17 1
8 776 0 30 6 1 0 0 0 1
9 0 0 0 1 294 0 0 0 0
10 0 0 0 44 0 0 0 0 0
12 0 0 0 10 0 0 0 0 0
'PBMC1 - contingency_matrix (rows: antibody - cols: scanpy)'
1 2 3 4 5 6 7 8 9 10 11
1 22 0 7 60 0 0 0 0 0 0 72
2 0 0 0 0 5 0 2 1 0 38 0
3 67 0 408 125 0 0 0 0 0 0 0
4 0 1 0 13 0 249 0 0 0 0 0
5 2 0 10 146 0 0 0 0 0 0 0
6 3 0 1 0 0 0 0 0 82 1 0
7 2 702 1 0 348 0 0 0 20 52 0
8 780 0 21 6 0 1 1 0 1 0 4
9 0 0 0 1 0 1 156 137 0 0 0
10 0 0 0 37 0 7 0 0 0 0 0
12 0 0 0 0 0 10 0 0 0 0 0
'PBMC1 - contingency_matrix (rows: antibody - cols: scvi-tools)'
1 2 3 4 5 6 7 8 9 10
1 28 0 0 22 0 1 36 0 74 0
2 0 3 42 0 1 0 0 0 0 0
3 123 0 0 434 0 0 43 0 0 0
4 0 1 0 0 0 259 3 0 0 0
5 0 0 0 17 0 0 138 0 3 0
6 0 1 1 0 0 0 0 56 3 26
7 1 644 451 0 0 1 0 27 0 1
8 786 0 0 19 1 2 3 0 2 1
9 0 0 0 0 294 1 0 0 0 0
10 0 0 0 1 0 9 34 0 0 0
12 0 0 0 0 0 10 0 0 0 0
'PBMC1 - contingency_matrix (rows: antibody - cols: seurat)'
1 2 3 4 5 6 7 8 9 10 11
1 93 14 0 0 0 54 0 0 0 0 0
2 0 0 1 4 0 0 1 1 39 0 0
3 22 536 0 0 0 42 0 0 0 0 0
4 0 0 1 0 261 1 0 0 0 0 0
5 1 18 0 0 0 139 0 0 0 0 0
6 3 1 0 1 0 0 0 0 1 56 25
7 2 1 524 488 2 0 0 0 89 18 1
8 766 40 0 0 1 5 1 0 0 0 1
9 0 0 0 0 2 0 151 142 0 0 0
10 0 0 0 0 8 36 0 0 0 0 0
12 0 0 0 0 8 2 0 0 0 0 0
'PBMC1 - contingency_matrix (rows: antibody - cols: COTAN)'
1 2 3 4 5 6 7 8 9 10 11
1 0 0 0 0 0 0 0 36 54 12 59
2 1 1 40 2 0 0 2 0 0 0 0
3 0 0 0 0 0 0 0 0 39 528 33
4 1 0 0 0 0 0 0 48 214 0 0
5 0 0 0 0 0 0 0 1 135 22 0
6 0 56 1 1 1 25 0 3 0 0 0
7 33 17 118 668 284 1 0 0 1 1 2
8 0 0 0 0 0 1 1 9 4 34 765
9 0 0 0 0 0 0 293 0 2 0 0
10 0 0 0 0 0 0 0 4 40 0 0
12 0 0 0 0 0 0 0 4 6 0 0
print_clustering_data(tuning = 'antibody',dataset="PBMC1")
'Initial COTAN cluster number:'
11
'Initial monocle cluster number:'
11
'Initial scanpy cluster number:'
9
'Initial scvi-tools cluster number:'
11
'Initial seurat cluster number:'
10
'Initial antibody cell/cluster table:'
cluster.ids
7     1338
8      876
3      748
4      341
9      331
5      211
1      202
6      131
2       62
10      51
12      16
Name: count, dtype: int64
'PBMC1 - contingency_matrix (rows: antibody - cols: monocle)'
1 2 3 4 5 6 7 8 9
1 94 0 17 50 0 0 0 0 0
2 0 4 0 0 3 0 38 1 0
3 35 0 500 65 0 0 0 0 0
4 0 1 0 262 0 0 0 0 0
5 2 0 29 127 0 0 0 0 0
6 0 1 0 0 0 1 1 58 26
7 2 731 0 4 0 275 95 17 1
8 776 0 30 6 1 0 0 0 1
9 0 0 0 1 294 0 0 0 0
10 0 0 0 44 0 0 0 0 0
12 0 0 0 10 0 0 0 0 0
'PBMC1 - contingency_matrix (rows: antibody - cols: scanpy)'
1 2 3 4 5 6 7 8 9 10 11
1 22 0 7 60 0 0 0 0 0 0 72
2 0 0 0 0 5 0 2 1 0 38 0
3 67 0 408 125 0 0 0 0 0 0 0
4 0 1 0 13 0 249 0 0 0 0 0
5 2 0 10 146 0 0 0 0 0 0 0
6 3 0 1 0 0 0 0 0 82 1 0
7 2 702 1 0 348 0 0 0 20 52 0
8 780 0 21 6 0 1 1 0 1 0 4
9 0 0 0 1 0 1 156 137 0 0 0
10 0 0 0 37 0 7 0 0 0 0 0
12 0 0 0 0 0 10 0 0 0 0 0
'PBMC1 - contingency_matrix (rows: antibody - cols: scvi-tools)'
1 2 3 4 5 6 7 8 9 10
1 28 0 0 22 0 1 36 0 74 0
2 0 3 42 0 1 0 0 0 0 0
3 123 0 0 434 0 0 43 0 0 0
4 0 1 0 0 0 259 3 0 0 0
5 0 0 0 17 0 0 138 0 3 0
6 0 1 1 0 0 0 0 56 3 26
7 1 644 451 0 0 1 0 27 0 1
8 786 0 0 19 1 2 3 0 2 1
9 0 0 0 0 294 1 0 0 0 0
10 0 0 0 1 0 9 34 0 0 0
12 0 0 0 0 0 10 0 0 0 0
'PBMC1 - contingency_matrix (rows: antibody - cols: seurat)'
1 2 3 4 5 6 7 8 9 10 11
1 93 14 0 0 0 54 0 0 0 0 0
2 0 0 1 4 0 0 1 1 39 0 0
3 22 536 0 0 0 42 0 0 0 0 0
4 0 0 1 0 261 1 0 0 0 0 0
5 1 18 0 0 0 139 0 0 0 0 0
6 3 1 0 1 0 0 0 0 1 56 25
7 2 1 524 488 2 0 0 0 89 18 1
8 766 40 0 0 1 5 1 0 0 0 1
9 0 0 0 0 2 0 151 142 0 0 0
10 0 0 0 0 8 36 0 0 0 0 0
12 0 0 0 0 8 2 0 0 0 0 0
'PBMC1 - contingency_matrix (rows: antibody - cols: COTAN)'
1 2 3 4 5 6 7 8 9 10 11
1 0 0 0 0 0 0 0 36 54 12 59
2 1 1 40 2 0 0 2 0 0 0 0
3 0 0 0 0 0 0 0 0 39 528 33
4 1 0 0 0 0 0 0 48 214 0 0
5 0 0 0 0 0 0 0 1 135 22 0
6 0 56 1 1 1 25 0 3 0 0 0
7 33 17 118 668 284 1 0 0 1 1 2
8 0 0 0 0 0 1 1 9 4 34 765
9 0 0 0 0 0 0 293 0 2 0 0
10 0 0 0 0 0 0 0 4 40 0 0
12 0 0 0 0 0 0 0 4 6 0 0

Default parameters

print_scores(tuning = 'default',dataset="PBMC1")
'PBMC1 - number of clusters'
monocle scanpy scvi-tools seurat COTAN
0 3 18 13 11 14
'PBMC1 - Silhuette (higher is better)'
monocle scanpy scvi-tools seurat COTAN
0 0.106025 0.062012 0.087622 0.168956 0.155843
'PBMC1 - Calinski_Harabasz (higher is better)'
monocle scanpy scvi-tools seurat COTAN
0 194.710746 159.568549 193.151278 235.185139 195.201252
'PBMC1 - davies_bouldin (lower is better)'
monocle scanpy scvi-tools seurat COTAN
0 3.046493 2.534291 2.547137 1.695538 2.147008
'PBMC1 - Silhuette from Prob. (higher is better)'
monocle scanpy scvi-tools seurat COTAN
0 0.182454 -0.006178 0.148873 0.235918 0.184544
'PBMC1 - Calinski_Harabasz from Prob. (higher is better)'
monocle scanpy scvi-tools seurat COTAN
0 187.272904 160.02968 199.227613 213.845901 190.312532
'PBMC1 - davies_bouldin  from Prob. (lower is better)'
monocle scanpy scvi-tools seurat COTAN
0 2.582264 2.79635 2.842419 1.973727 2.274669
'PBMC1 - default labels'
NMI ARI homogeneity completeness fowlkes_mallows precision recall
monocle 0.578257 0.384609 0.410140 0.979930 0.602512 0.366057 0.991705
scanpy 0.721042 0.404607 0.824980 0.640363 0.508176 0.787276 0.328021
scvi-tools 0.776232 0.599664 0.809790 0.745344 0.666244 0.811382 0.547068
seurat 0.793630 0.649593 0.784165 0.803327 0.705921 0.747450 0.666699
COTAN 0.787289 0.670392 0.803876 0.771373 0.723485 0.796154 0.657448
NMI ARI homogeneity completeness fowlkes_mallows precision recall
monocle 0.622344 0.439255 0.458549 0.968180 0.645589 0.421345 0.989178
scanpy 0.662480 0.389320 0.814398 0.558329 0.511739 0.851844 0.307424
scvi-tools 0.718265 0.557951 0.800919 0.651075 0.643426 0.842101 0.491625
seurat 0.747924 0.647338 0.787235 0.712353 0.712527 0.810669 0.626267
COTAN 0.727398 0.637277 0.792854 0.671926 0.705427 0.834262 0.596488
print_scores(tuning = 'default',dataset="PBMC2")
'PBMC2 - number of clusters'
monocle scanpy scvi-tools seurat COTAN
0 2 18 20 14 17
'PBMC2 - Silhuette (higher is better)'
monocle scanpy scvi-tools seurat COTAN
0 0.237524 0.077322 0.018324 0.134064 0.140061
'PBMC2 - Calinski_Harabasz (higher is better)'
monocle scanpy scvi-tools seurat COTAN
0 298.25227 270.502074 223.039427 367.295749 296.064689
'PBMC2 - davies_bouldin (lower is better)'
monocle scanpy scvi-tools seurat COTAN
0 3.89379 2.581588 3.703433 1.958013 2.527846
'PBMC2 - Silhuette from Prob. (higher is better)'
monocle scanpy scvi-tools seurat COTAN
0 0.283181 0.1987 0.064022 0.358299 0.274624
'PBMC2 - Calinski_Harabasz from Prob. (higher is better)'
monocle scanpy scvi-tools seurat COTAN
0 284.036162 259.900464 223.875031 377.870193 294.416967
'PBMC2 - davies_bouldin  from Prob. (lower is better)'
monocle scanpy scvi-tools seurat COTAN
0 3.514194 2.322847 5.400931 1.992412 2.285385
'PBMC2 - default labels'
NMI ARI homogeneity completeness fowlkes_mallows precision recall
monocle 0.393166 0.207180 0.245998 0.978626 0.521364 0.272813 0.996358
scanpy 0.718820 0.457213 0.804000 0.649960 0.556684 0.814847 0.380314
scvi-tools 0.699788 0.424696 0.785920 0.630670 0.525031 0.762849 0.361352
seurat 0.775988 0.562430 0.819560 0.736815 0.640108 0.816395 0.501888
COTAN 0.729355 0.472800 0.745550 0.713848 0.562480 0.591410 0.534966
NMI ARI homogeneity completeness fowlkes_mallows precision recall
monocle 0.261544 0.093122 0.154169 0.861687 0.457589 0.212802 0.983954
scanpy 0.685644 0.523244 0.773965 0.615415 0.607208 0.814732 0.452543
scvi-tools 0.654391 0.485789 0.750936 0.579842 0.573975 0.779957 0.422391
seurat 0.748480 0.679377 0.793713 0.708124 0.734394 0.850648 0.634028
COTAN 0.703575 0.633882 0.697485 0.709773 0.704954 0.646778 0.768362
print_scores(tuning = 'default',dataset="PBMC3")
'PBMC3 - number of clusters'
monocle scanpy scvi-tools seurat COTAN
0 3 22 17 18 32
'PBMC3 - Silhuette (higher is better)'
monocle scanpy scvi-tools seurat COTAN
0 0.173831 0.017764 0.066172 0.12701 0.09951
'PBMC3 - Calinski_Harabasz (higher is better)'
monocle scanpy scvi-tools seurat COTAN
0 565.456442 389.223708 568.006153 568.200931 382.825491
'PBMC3 - davies_bouldin (lower is better)'
monocle scanpy scvi-tools seurat COTAN
0 3.238128 3.245809 2.168128 2.441035 2.751106
'PBMC3 - Silhuette from Prob. (higher is better)'
monocle scanpy scvi-tools seurat COTAN
0 0.214085 0.065185 0.226855 0.282982 0.1924
'PBMC3 - Calinski_Harabasz from Prob. (higher is better)'
monocle scanpy scvi-tools seurat COTAN
0 531.480656 382.798915 537.03678 586.377699 392.740241
'PBMC3 - davies_bouldin  from Prob. (lower is better)'
monocle scanpy scvi-tools seurat COTAN
0 2.634444 3.87979 2.321242 2.318734 3.32168
'PBMC3 - default labels'
NMI ARI homogeneity completeness fowlkes_mallows precision recall
monocle 0.500696 0.233560 0.338609 0.960446 0.500077 0.252454 0.990587
scanpy 0.685919 0.462762 0.763719 0.622505 0.541286 0.750873 0.390201
scvi-tools 0.738418 0.579677 0.757237 0.720511 0.635237 0.710070 0.568291
seurat 0.770512 0.585110 0.821173 0.725738 0.644073 0.789750 0.525267
COTAN 0.723833 0.527470 0.849029 0.630815 0.609217 0.882695 0.420469
NMI ARI homogeneity completeness fowlkes_mallows precision recall
monocle 0.469679 0.196545 0.316293 0.911906 0.473204 0.228265 0.980975
scanpy 0.675464 0.545251 0.756553 0.610075 0.612243 0.807482 0.464211
scvi-tools 0.721883 0.667226 0.738229 0.706244 0.710211 0.754868 0.668197
seurat 0.749240 0.667468 0.798924 0.705374 0.712865 0.824327 0.616475
COTAN 0.677769 0.535850 0.798894 0.588538 0.606241 0.820085 0.448159
print_scores(tuning = 'default',dataset="PBMC4")
'PBMC4 - number of clusters'
monocle scanpy scvi-tools seurat COTAN
0 3 22 16 19 24
'PBMC4 - Silhuette (higher is better)'
monocle scanpy scvi-tools seurat COTAN
0 0.081399 0.063742 0.075337 0.12954 0.11378
'PBMC4 - Calinski_Harabasz (higher is better)'
monocle scanpy scvi-tools seurat COTAN
0 364.985136 267.681245 341.396665 364.393784 293.065775
'PBMC4 - davies_bouldin (lower is better)'
monocle scanpy scvi-tools seurat COTAN
0 3.354088 2.496024 2.226 2.224448 2.33448
'PBMC4 - Silhuette from Prob. (higher is better)'
monocle scanpy scvi-tools seurat COTAN
0 0.193766 0.025023 0.077663 0.187532 0.030818
'PBMC4 - Calinski_Harabasz from Prob. (higher is better)'
monocle scanpy scvi-tools seurat COTAN
0 353.871309 254.540593 284.048471 347.979408 255.247203
'PBMC4 - davies_bouldin  from Prob. (lower is better)'
monocle scanpy scvi-tools seurat COTAN
0 2.775993 3.467425 2.808762 2.299231 3.246869
'PBMC4 - default labels'
NMI ARI homogeneity completeness fowlkes_mallows precision recall
monocle 0.617025 0.470070 0.453383 0.965513 0.647279 0.425154 0.985455
scanpy 0.701228 0.380357 0.819943 0.612541 0.487560 0.777350 0.305802
scvi-tools 0.739299 0.504966 0.788229 0.696088 0.584900 0.745208 0.459077
seurat 0.760207 0.494746 0.847372 0.689301 0.583823 0.820228 0.415555
COTAN 0.716555 0.435917 0.808134 0.643618 0.526063 0.714778 0.387173
NMI ARI homogeneity completeness fowlkes_mallows precision recall
monocle 0.636102 0.525318 0.482349 0.933740 0.689338 0.484673 0.980428
scanpy 0.642165 0.368291 0.789153 0.541335 0.491476 0.822336 0.293735
scvi-tools 0.698167 0.482058 0.770861 0.638003 0.577373 0.767464 0.434365
seurat 0.691539 0.444871 0.803112 0.607186 0.550863 0.803654 0.377588
COTAN 0.625477 0.351843 0.733504 0.545185 0.461853 0.646787 0.329796

Matching cellTypist clusters number

print_scores(tuning = 'celltypist',dataset="PBMC1")
'PBMC1 - number of clusters'
monocle scanpy scvi-tools seurat COTAN
0 18 17 20 21 14
'PBMC1 - Silhuette (higher is better)'
monocle scanpy scvi-tools seurat COTAN celltypist
0 0.022324 0.070608 0.079342 0.108506 0.155843 0.116981
'PBMC1 - Calinski_Harabasz (higher is better)'
monocle scanpy scvi-tools seurat COTAN celltypist
0 128.949194 160.591477 164.988143 187.121919 195.201252 193.803845
'PBMC1 - davies_bouldin (lower is better)'
monocle scanpy scvi-tools seurat COTAN celltypist
0 2.806118 2.79721 2.797975 2.074196 2.147008 1.690698
'PBMC1 - Silhuette from Prob. (higher is better)'
monocle scanpy scvi-tools seurat COTAN celltypist
0 -0.045223 0.035629 -0.025941 0.041531 0.184544 0.436382
'PBMC1 - Calinski_Harabasz from Prob. (higher is better)'
monocle scanpy scvi-tools seurat COTAN celltypist
0 105.612108 166.682035 142.22761 162.068208 190.312532 254.936556
'PBMC1 - davies_bouldin  from Prob. (lower is better)'
monocle scanpy scvi-tools seurat COTAN celltypist
0 5.698571 3.900272 4.09973 3.674422 2.274669 1.469953
'PBMC1 - matching celltypist labels'
NMI ARI homogeneity completeness fowlkes_mallows precision recall
monocle 0.658065 0.341945 0.757164 0.581903 0.448601 0.714098 0.281814
scanpy 0.735830 0.459735 0.822412 0.665742 0.553086 0.794910 0.384828
scvi-tools 0.699950 0.375082 0.808964 0.616828 0.479652 0.748060 0.307550
seurat 0.730468 0.423069 0.849278 0.640820 0.527386 0.822925 0.337985
COTAN 0.787289 0.670392 0.803876 0.771373 0.723485 0.796154 0.657448
print_scores(tuning = 'celltypist',dataset="PBMC2")
'PBMC2 - number of clusters'
monocle scanpy scvi-tools seurat COTAN
0 18 20 19 20 17
'PBMC2 - Silhuette (higher is better)'
monocle scanpy scvi-tools seurat COTAN celltypist
0 -0.018715 0.041013 0.045461 0.084324 0.140061 0.153673
'PBMC2 - Calinski_Harabasz (higher is better)'
monocle scanpy scvi-tools seurat COTAN celltypist
0 174.411397 246.631074 268.231569 319.598571 296.064689 402.453835
'PBMC2 - davies_bouldin (lower is better)'
monocle scanpy scvi-tools seurat COTAN celltypist
0 3.376582 2.743121 3.804791 2.076454 2.527846 1.374222
'PBMC2 - Silhuette from Prob. (higher is better)'
monocle scanpy scvi-tools seurat COTAN celltypist
0 0.01541 0.107595 0.143462 0.181639 0.274624 0.430025
'PBMC2 - Calinski_Harabasz from Prob. (higher is better)'
monocle scanpy scvi-tools seurat COTAN celltypist
0 166.030172 236.1466 264.309361 321.631607 294.416967 456.336419
'PBMC2 - davies_bouldin  from Prob. (lower is better)'
monocle scanpy scvi-tools seurat COTAN celltypist
0 3.978911 3.444155 4.011805 2.517532 2.285385 1.14756
'PBMC2 - matching celltypist labels'
NMI ARI homogeneity completeness fowlkes_mallows precision recall
monocle 0.605942 0.312112 0.699644 0.534375 0.425421 0.696157 0.259975
scanpy 0.697287 0.377675 0.809335 0.612491 0.492889 0.812324 0.299067
scvi-tools 0.709450 0.398779 0.791930 0.642531 0.500807 0.730881 0.343158
seurat 0.738307 0.418942 0.850535 0.652244 0.529176 0.837949 0.334181
COTAN 0.729355 0.472800 0.745550 0.713848 0.562480 0.591410 0.534966
print_scores(tuning = 'celltypist',dataset="PBMC3")
'PBMC3 - number of clusters'
monocle scanpy scvi-tools seurat COTAN
0 17 18 20 18 23
'PBMC3 - Silhuette (higher is better)'
monocle scanpy scvi-tools seurat COTAN celltypist
0 -0.028636 0.05187 0.012342 0.127418 0.055352 0.143797
'PBMC3 - Calinski_Harabasz (higher is better)'
monocle scanpy scvi-tools seurat COTAN celltypist
0 294.452653 454.311295 471.87183 568.38436 398.465918 623.45755
'PBMC3 - davies_bouldin (lower is better)'
monocle scanpy scvi-tools seurat COTAN celltypist
0 4.102941 3.008669 2.201609 2.442825 2.650649 1.411931
'PBMC3 - Silhuette from Prob. (higher is better)'
monocle scanpy scvi-tools seurat COTAN celltypist
0 -0.0339 0.256035 0.182746 0.282975 0.087495 0.386958
'PBMC3 - Calinski_Harabasz from Prob. (higher is better)'
monocle scanpy scvi-tools seurat COTAN celltypist
0 262.303562 476.72854 558.156073 586.066802 354.838428 919.615311
'PBMC3 - davies_bouldin  from Prob. (lower is better)'
monocle scanpy scvi-tools seurat COTAN celltypist
0 11.445609 2.595482 2.392255 2.302532 2.872672 1.269121
'PBMC3 - matching celltypist labels'
NMI ARI homogeneity completeness fowlkes_mallows precision recall
monocle 0.593459 0.350206 0.643738 0.550465 0.432058 0.574653 0.324847
scanpy 0.712344 0.545918 0.758076 0.671816 0.609354 0.757298 0.490312
scvi-tools 0.735127 0.565025 0.767444 0.705423 0.623277 0.727251 0.534168
seurat 0.771047 0.586941 0.821567 0.726381 0.645653 0.790801 0.527147
COTAN 0.670200 0.459778 0.736074 0.615148 0.530378 0.652757 0.430943
print_scores(tuning = 'celltypist',dataset="PBMC4")
'PBMC4 - number of clusters'
monocle scanpy scvi-tools seurat COTAN
0 16 18 18 19 15
'PBMC4 - Silhuette (higher is better)'
monocle scanpy scvi-tools seurat COTAN celltypist
0 0.037346 0.061345 0.125126 0.129018 0.103684 0.090862
'PBMC4 - Calinski_Harabasz (higher is better)'
monocle scanpy scvi-tools seurat COTAN celltypist
0 297.000318 297.049456 372.487294 364.118805 331.149713 364.544605
'PBMC4 - davies_bouldin (lower is better)'
monocle scanpy scvi-tools seurat COTAN celltypist
0 2.555911 2.471675 1.925302 2.227526 2.857269 1.625159
'PBMC4 - Silhuette from Prob. (higher is better)'
monocle scanpy scvi-tools seurat COTAN celltypist
0 0.046459 0.09315 0.17079 0.186411 0.063028 0.425774
'PBMC4 - Calinski_Harabasz from Prob. (higher is better)'
monocle scanpy scvi-tools seurat COTAN celltypist
0 249.579567 283.799441 376.317639 348.894242 304.688587 498.170533
'PBMC4 - davies_bouldin  from Prob. (lower is better)'
monocle scanpy scvi-tools seurat COTAN celltypist
0 3.292834 3.340857 2.072035 2.298001 2.481324 1.095999
'PBMC4 - matching celltypist labels'
NMI ARI homogeneity completeness fowlkes_mallows precision recall
monocle 0.686019 0.421166 0.747399 0.633956 0.512728 0.700065 0.375523
scanpy 0.730100 0.473433 0.810168 0.664434 0.562407 0.778855 0.406111
scvi-tools 0.752718 0.500477 0.831022 0.687899 0.587099 0.808920 0.426105
seurat 0.759495 0.492528 0.846776 0.688525 0.581840 0.818184 0.413768
COTAN 0.724737 0.449720 0.766268 0.687477 0.534619 0.667192 0.428388

Matching antibody clusters number

print_scores(tuning = 'antibody',dataset="PBMC1")
'PBMC1 - number of clusters'
monocle scanpy scvi-tools seurat COTAN
0 9 11 10 11 11
'PBMC1 - Silhuette (higher is better)'
monocle scanpy scvi-tools seurat COTAN antibody
0 0.123097 0.097602 0.094258 0.171754 0.108557 0.069567
'PBMC1 - Calinski_Harabasz (higher is better)'
monocle scanpy scvi-tools seurat COTAN antibody
0 203.253687 193.550388 189.978034 237.429051 166.755584 131.570445
'PBMC1 - davies_bouldin (lower is better)'
monocle scanpy scvi-tools seurat COTAN antibody
0 2.027098 1.886001 1.890236 1.677632 2.486428 2.515129
'PBMC1 - Silhuette from Prob. (higher is better)'
monocle scanpy scvi-tools seurat COTAN antibody
0 0.101586 0.297043 0.261342 0.218776 0.106476 0.245087
'PBMC1 - Calinski_Harabasz from Prob. (higher is better)'
monocle scanpy scvi-tools seurat COTAN antibody
0 184.932102 202.80892 206.264825 214.61793 153.161104 131.570445
'PBMC1 - davies_bouldin  from Prob. (lower is better)'
monocle scanpy scvi-tools seurat COTAN antibody
0 2.395779 1.863548 1.700282 1.947515 2.755616 2.515129
'PBMC1 - matching antibody labels'
NMI ARI homogeneity completeness fowlkes_mallows precision recall
monocle 0.724319 0.641765 0.727281 0.721381 0.707943 0.753325 0.665295
scanpy 0.746106 0.652841 0.792721 0.704669 0.717629 0.829289 0.621003
scvi-tools 0.757587 0.658079 0.782127 0.734540 0.721084 0.800236 0.649760
seurat 0.749425 0.642110 0.790860 0.712116 0.708375 0.813318 0.616972
COTAN 0.716421 0.637108 0.744518 0.690367 0.703698 0.787585 0.628745
print_scores(tuning = 'antibody',dataset="PBMC2")
'PBMC2 - number of clusters'
monocle scanpy scvi-tools seurat COTAN
0 11 10 12 12 12
'PBMC2 - Silhuette (higher is better)'
monocle scanpy scvi-tools seurat COTAN antibody
0 -0.032335 0.052435 -0.009451 0.111841 0.085442 0.057103
'PBMC2 - Calinski_Harabasz (higher is better)'
monocle scanpy scvi-tools seurat COTAN antibody
0 196.870204 272.65961 193.762495 291.540798 222.099829 197.474522
'PBMC2 - davies_bouldin (lower is better)'
monocle scanpy scvi-tools seurat COTAN antibody
0 3.377139 2.347558 4.634488 1.849581 2.78642 2.980073
'PBMC2 - Silhuette from Prob. (higher is better)'
monocle scanpy scvi-tools seurat COTAN antibody
0 0.040432 0.266462 0.080701 0.359544 0.238423 0.23847
'PBMC2 - Calinski_Harabasz from Prob. (higher is better)'
monocle scanpy scvi-tools seurat COTAN antibody
0 195.298772 291.365112 211.716959 297.530428 239.08615 197.474522
'PBMC2 - davies_bouldin  from Prob. (lower is better)'
monocle scanpy scvi-tools seurat COTAN antibody
0 6.357385 2.018126 3.788174 1.490357 2.063593 2.980073
'PBMC2 - matching antibody labels'
NMI ARI homogeneity completeness fowlkes_mallows precision recall
monocle 0.590346 0.458018 0.610332 0.571627 0.544156 0.612308 0.483589
scanpy 0.746517 0.649454 0.756633 0.736667 0.707302 0.779740 0.641594
scvi-tools 0.672359 0.576869 0.708154 0.640009 0.646073 0.750157 0.556430
seurat 0.759142 0.760752 0.776572 0.742477 0.800906 0.838459 0.765035
COTAN 0.735282 0.673070 0.687591 0.790082 0.743686 0.649622 0.851370
print_scores(tuning = 'antibody',dataset="PBMC3")
'PBMC3 - number of clusters'
monocle scanpy scvi-tools seurat COTAN
0 12 14 13 14 12
'PBMC3 - Silhuette (higher is better)'
monocle scanpy scvi-tools seurat COTAN antibody
0 -0.040176 0.034398 0.001717 0.076119 0.066886 0.037871
'PBMC3 - Calinski_Harabasz (higher is better)'
monocle scanpy scvi-tools seurat COTAN antibody
0 303.562678 332.440157 368.948628 434.276887 338.97586 309.45087
'PBMC3 - davies_bouldin (lower is better)'
monocle scanpy scvi-tools seurat COTAN antibody
0 3.604809 3.434343 3.282334 2.612535 3.274719 3.04294
'PBMC3 - Silhuette from Prob. (higher is better)'
monocle scanpy scvi-tools seurat COTAN antibody
0 0.074904 0.23307 0.17138 0.305558 0.220006 0.205664
'PBMC3 - Calinski_Harabasz from Prob. (higher is better)'
monocle scanpy scvi-tools seurat COTAN antibody
0 298.489075 382.997054 438.502899 489.196185 393.696152 309.45087
'PBMC3 - davies_bouldin  from Prob. (lower is better)'
monocle scanpy scvi-tools seurat COTAN antibody
0 12.995929 2.882473 4.362136 1.884472 2.454523 3.04294
'PBMC3 - matching antibody labels'
NMI ARI homogeneity completeness fowlkes_mallows precision recall
monocle 0.644484 0.537094 0.639574 0.649469 0.598577 0.600917 0.596245
scanpy 0.729603 0.683244 0.752370 0.708173 0.724410 0.784386 0.669020
scvi-tools 0.726492 0.670063 0.728625 0.724372 0.713239 0.729596 0.697249
seurat 0.764843 0.698339 0.799673 0.732920 0.738860 0.829783 0.657901
COTAN 0.691237 0.607331 0.643954 0.746015 0.676860 0.575699 0.795798
print_scores(tuning = 'antibody',dataset="PBMC4")
'PBMC4 - number of clusters'
monocle scanpy scvi-tools seurat COTAN
0 13 11 11 13 12
'PBMC4 - Silhuette (higher is better)'
monocle scanpy scvi-tools seurat COTAN antibody
0 0.003249 0.048981 0.047987 0.075298 0.044429 -0.042083
'PBMC4 - Calinski_Harabasz (higher is better)'
monocle scanpy scvi-tools seurat COTAN antibody
0 235.748317 272.355681 313.161524 323.350677 198.84997 196.596426
'PBMC4 - davies_bouldin (lower is better)'
monocle scanpy scvi-tools seurat COTAN antibody
0 2.927763 2.81827 2.00597 2.506347 2.779589 4.520827
'PBMC4 - Silhuette from Prob. (higher is better)'
monocle scanpy scvi-tools seurat COTAN antibody
0 0.037 0.149413 0.138041 0.120031 0.068881 0.189162
'PBMC4 - Calinski_Harabasz from Prob. (higher is better)'
monocle scanpy scvi-tools seurat COTAN antibody
0 191.855254 247.966629 308.946333 300.355027 217.217336 196.596426
'PBMC4 - davies_bouldin  from Prob. (lower is better)'
monocle scanpy scvi-tools seurat COTAN antibody
0 3.397778 2.505744 2.407189 2.295438 2.355103 4.520827
'PBMC4 - matching antibody labels'
NMI ARI homogeneity completeness fowlkes_mallows precision recall
monocle 0.642736 0.462311 0.694650 0.598042 0.558509 0.724062 0.430809
scanpy 0.718317 0.585612 0.758534 0.682150 0.662578 0.798043 0.550107
scvi-tools 0.730060 0.588797 0.763135 0.699732 0.665395 0.803057 0.551331
seurat 0.722468 0.570140 0.785446 0.668840 0.651248 0.815130 0.520315
COTAN 0.675609 0.518293 0.678118 0.673118 0.606106 0.659437 0.557089

Check cellTypist vs Antibody

def compute_clustering_scores(output_dir, dataset):#celltypist_df, antibody_df,
    # Merge the dataframes on the common 'cell' column
    #cotan_df = pd.read_csv(f'{DIR}{dataset}/COTAN/antibody/clustering_labels.csv', index_col=0)
    #display("Cotan clusters objetc dimension ",cotan_df.shape)
    #display("----------------------------------------")

    celltypist_df = pd.read_csv(f'{DIR}{dataset}/celltypist/celltypist_labels.csv', index_col=0)
    celltypist_df.index = celltypist_df.index.str[:-2]
    antibody_df = pd.read_csv(f'{DIR}{dataset}/antibody_annotation/antibody_labels_postproc.csv', index_col=0)
    #antibody_df = labels_df.merge(antibody_df, how='inner', on='cell')
    #all_in_antibody = celltypist_df.index.isin(antibody_df.index).all()
    #all_in_celltypist = antibody_df.index.isin(celltypist_df.index).all()

    #display("All celltypist indices in antibody: ",all_in_antibody, celltypist_df.index.isin(antibody_df.index).sum(),celltypist_df.shape)
    #display("All antibody indices in cellTypist:", all_in_celltypist)

    #display("----------------------------------------")
    
    merged_df = celltypist_df.merge(antibody_df, how='inner',left_index=True, right_index=True)# on='cell')

    merged_df.columns = ['cluster_celltypist','cluster_antibody']
    
    # Initialize scores dictionary
    scores = {
        'NMI': normalized_mutual_info_score(merged_df['cluster_celltypist'], merged_df['cluster_antibody'], average_method='arithmetic'),
        'ARI': adjusted_rand_score(merged_df['cluster_celltypist'], merged_df['cluster_antibody']),
        'Homogeneity': homogeneity_score(merged_df['cluster_celltypist'], merged_df['cluster_antibody']),
        'Completeness': completeness_score(merged_df['cluster_celltypist'], merged_df['cluster_antibody']),
        'Fowlkes_Mallows': fowlkes_mallows_score(merged_df['cluster_celltypist'], merged_df['cluster_antibody'])
    }
    
    # Convert scores to DataFrame
    scores_df = pd.DataFrame([scores])
    
    # Save scores to CSV and LaTeX
    #scores_df.to_csv(f'{output_dir}{dataset}/clustering_comparison_scores.csv')
    #scores_df.to_latex(f'{output_dir}{dataset}/clustering_comparison_scores.tex')
    
    # Display scores DataFrame
    display(scores_df)
for dataset in DATASET_NAMES:
    #display('------------------------------')
    display(f'{dataset} - Clustering Comparison between CellTypist and Antibody')

    # Assuming celltypist_df and antibody_df are defined elsewhere and available here
    compute_clustering_scores(DIR, dataset)
'PBMC1 - Clustering Comparison between CellTypist and Antibody'
NMI ARI Homogeneity Completeness Fowlkes_Mallows
0 0.752326 0.731095 0.708308 0.802178 0.78126
'PBMC2 - Clustering Comparison between CellTypist and Antibody'
NMI ARI Homogeneity Completeness Fowlkes_Mallows
0 0.659259 0.481537 0.667725 0.651004 0.585734
'PBMC3 - Clustering Comparison between CellTypist and Antibody'
NMI ARI Homogeneity Completeness Fowlkes_Mallows
0 0.693433 0.555502 0.693429 0.693436 0.618105
'PBMC4 - Clustering Comparison between CellTypist and Antibody'
NMI ARI Homogeneity Completeness Fowlkes_Mallows
0 0.751294 0.7252 0.728817 0.775201 0.776972

Summary

External measures

def load_scores(tuning, dataset):
    scores = pd.read_csv(f'{DIR}{dataset}/scores_{tuning}.csv')
    scores = scores.rename(columns={"Unnamed: 0": "tool"})
    scores['tuning'] = tuning
    return scores
datasets = ['PBMC1', 'PBMC2', 'PBMC3', 'PBMC4']
tunings = ['default_celltypist', 'default_antibody', 'celltypist_celltypist', 'antibody_antibody']

scores_list = []

# Concatenate all scores into one DataFrame
for dataset in datasets:
    for tuning in tunings:
        scores = load_scores(tuning, dataset)
        scores['dataset'] = dataset
        scores_list.append(scores)

all_scores = pd.concat(scores_list)

# Prepare data for plotting
all_scores_melted = all_scores.melt(id_vars=['tool', 'tuning', 'dataset'], var_name='score', value_name='value')

sns.set_context("talk") 
# Define custom colors
custom_palette = { 
    "seurat": "#4575B4",
    "monocle": "#DAABE9",
    "scanpy": "#7F9B5C",
    "COTAN": "#F73604",
    "scvi-tools": "#B6A18F"
}

g = sns.FacetGrid(all_scores_melted, row='score', col='tuning', sharey=False, height=4, aspect=1.3)
g.map(sns.pointplot, 'tool', 'value', palette=custom_palette,capsize=0.2, errwidth=2)

# Set titles and labels
g.set_titles(col_template="{col_name}", row_template="{row_name}")
g.set_axis_labels("Tool", "Score Value")
plt.subplots_adjust(top=1.4)
#g.fig.suptitle('Comparison of Clustering Tools by Various Scores and Conditions')
# Rotate x-axis labels
for ax in g.axes.flatten():
    plt.setp(ax.get_xticklabels(), rotation=45)

g.savefig("ClusteringToolsComparison.pdf")
plt.show()

Internal measures

# Load your data (assuming you have CSV files for the scores)
def load_scores(tuning, dataset, score_type):
    file_path = f'{DIR}{dataset}/{tuning}_{score_type}.csv'
    print(f"Loading {file_path}")
    scores = pd.read_csv(file_path, header=0)  # Read the CSV file without an index column
    scores_melted = scores.melt(var_name='tool', value_name='value')
    scores_melted['tuning'] = tuning
    scores_melted['dataset'] = dataset
    scores_melted['score_type'] = score_type
    return scores_melted

datasets = ['PBMC1', 'PBMC2', 'PBMC3', 'PBMC4']
tunings = ['default', 'celltypist', 'antibody']
score_types = ['silhouette', 'davies_bouldin','Calinski_Harabasz','silhouette_fromProb', 'davies_bouldin_fromProb','Calinski_Harabasz_fromProb']
scores_list = []

# Concatenate all scores into one DataFrame
for dataset in datasets:
    for tuning in tunings:
        for score_type in score_types:
            scores = load_scores(tuning, dataset, score_type)
            scores_list.append(scores)

all_scores = pd.concat(scores_list)

# Debug: Check the loaded data
print(all_scores.head())

# Define custom colors
custom_palette = {
    "seurat": "#4575B4",
    "monocle": "#DAABE9",
    "scanpy": "#7F9B5C",
    "COTAN": "#F73604",
    "scvi-tools": "#B6A18F"
}

# Filter for silhouette and davies_bouldin scores
silhouette_scores = all_scores[all_scores['score_type'] == 'silhouette']
davies_bouldin_scores = all_scores[all_scores['score_type'] == 'davies_bouldin']
Calinski_Harabasz_scores = all_scores[all_scores['score_type'] == 'Calinski_Harabasz']
silhouette_scores_fromProb = all_scores[all_scores['score_type'] == 'silhouette_fromProb']
davies_bouldin_scores_fromProb = all_scores[all_scores['score_type'] == 'davies_bouldin_fromProb']
Calinski_Harabasz_scores_fromProb = all_scores[all_scores['score_type'] == 'Calinski_Harabasz_fromProb']

# Plot Silhouette scores
g1 = sns.FacetGrid(silhouette_scores, col='tuning', sharey=False, height=4, aspect=1.8)
g1.map(sns.pointplot, 'tool', 'value', palette=custom_palette, order=[ "monocle", "scanpy", "scvi-tools","seurat","COTAN"],capsize=0.2, errwidth=2)
g1.set_titles(col_template="{col_name}")
g1.set_axis_labels("Tool", "Silhouette Score")
g1.fig.suptitle('Silhouette Scores by Tool and Tuning Condition', y=1.25)
plt.subplots_adjust(top=0.85)
# Rotate x-axis labels
for ax in g1.axes.flatten():
    plt.setp(ax.get_xticklabels(), rotation=45)

# Plot Davies-Bouldin scores
g2 = sns.FacetGrid(davies_bouldin_scores, col='tuning', sharey=False, height=4, aspect=1.8)
g2.map(sns.pointplot, 'tool', 'value', palette=custom_palette, order=["monocle", "scanpy",  "scvi-tools","seurat","COTAN"],capsize=0.2, errwidth=2)
g2.set_titles(col_template="{col_name}")
g2.set_axis_labels("Tool", "Davies-Bouldin Score")
g2.fig.suptitle('Davies-Bouldin Scores by Tool and Tuning Condition', y=1.85)
plt.subplots_adjust(top=1.5)
# Rotate x-axis labels
for ax in g2.axes.flatten():
    plt.setp(ax.get_xticklabels(), rotation=45)

# Plot Calinski_Harabasz scores
g3 = sns.FacetGrid(Calinski_Harabasz_scores, col='tuning', sharey=False, height=4, aspect=1.8)
g3.map(sns.pointplot, 'tool', 'value', palette=custom_palette, order=["monocle", "scanpy",  "scvi-tools","seurat","COTAN"],capsize=0.2, errwidth=2)
g3.set_titles(col_template="{col_name}")
g3.set_axis_labels("Tool", "Calinski_Harabasz Score")
g3.fig.suptitle('Calinski Harabasz Scores by Tool and Tuning Condition', y=1.85)
plt.subplots_adjust(top=1.5)
# Rotate x-axis labels
for ax in g3.axes.flatten():
    plt.setp(ax.get_xticklabels(), rotation=45)


# Plot Silhouette scores
g4 = sns.FacetGrid(silhouette_scores_fromProb, col='tuning', sharey=False, height=4, aspect=1.8)
g4.map(sns.pointplot, 'tool', 'value', palette=custom_palette, order=[ "monocle", "scanpy", "scvi-tools","seurat","COTAN"],capsize=0.2, errwidth=2)
g4.set_titles(col_template="{col_name}")
g4.set_axis_labels("Tool", "Silhouette Score")
g4.fig.suptitle('Silhouette Scores From Prob. by Tool and Tuning Condition', y=1.25)
plt.subplots_adjust(top=0.85)
# Rotate x-axis labels
for ax in g4.axes.flatten():
    plt.setp(ax.get_xticklabels(), rotation=45)

# Plot Davies-Bouldin scores
g5 = sns.FacetGrid(davies_bouldin_scores_fromProb, col='tuning', sharey=False, height=4, aspect=1.8)
g5.map(sns.pointplot, 'tool', 'value', palette=custom_palette, order=["monocle", "scanpy",  "scvi-tools","seurat","COTAN"],capsize=0.2, errwidth=2)
g5.set_titles(col_template="{col_name}")
g5.set_axis_labels("Tool", "Davies-Bouldin Score")
g5.fig.suptitle('Davies-Bouldin Scores From Prob. by Tool and Tuning Condition', y=1.85)
plt.subplots_adjust(top=1.5)
# Rotate x-axis labels
for ax in g5.axes.flatten():
    plt.setp(ax.get_xticklabels(), rotation=45)

# Plot Calinski_Harabasz scores
g6 = sns.FacetGrid(Calinski_Harabasz_scores_fromProb, col='tuning', sharey=False, height=4, aspect=1.8)
g6.map(sns.pointplot, 'tool', 'value', palette=custom_palette, order=["monocle", "scanpy",  "scvi-tools","seurat","COTAN"],capsize=0.2, errwidth=2)
g6.set_titles(col_template="{col_name}")
g6.set_axis_labels("Tool", "Calinski_Harabasz Score")
g6.fig.suptitle('Calinski Harabasz Scores From Prob. by Tool and Tuning Condition', y=1.85)
plt.subplots_adjust(top=1.5)
# Rotate x-axis labels
for ax in g6.axes.flatten():
    plt.setp(ax.get_xticklabels(), rotation=45)


g1.savefig("Silhouette.pdf")
g2.savefig("Calinski_Harabasz.pdf")
g3.savefig("Davies_Bouldin.pdf")

g4.savefig("SilhouetteFromProb.pdf")
g5.savefig("Calinski_HarabaszFromProb.pdf")
g6.savefig("Davies_BouldinFromProb.pdf")

plt.show()
Loading Data/PBMC1/default_silhouette.csv
Loading Data/PBMC1/default_davies_bouldin.csv
Loading Data/PBMC1/default_Calinski_Harabasz.csv
Loading Data/PBMC1/default_silhouette_fromProb.csv
Loading Data/PBMC1/default_davies_bouldin_fromProb.csv
Loading Data/PBMC1/default_Calinski_Harabasz_fromProb.csv
Loading Data/PBMC1/celltypist_silhouette.csv
Loading Data/PBMC1/celltypist_davies_bouldin.csv
Loading Data/PBMC1/celltypist_Calinski_Harabasz.csv
Loading Data/PBMC1/celltypist_silhouette_fromProb.csv
Loading Data/PBMC1/celltypist_davies_bouldin_fromProb.csv
Loading Data/PBMC1/celltypist_Calinski_Harabasz_fromProb.csv
Loading Data/PBMC1/antibody_silhouette.csv
Loading Data/PBMC1/antibody_davies_bouldin.csv
Loading Data/PBMC1/antibody_Calinski_Harabasz.csv
Loading Data/PBMC1/antibody_silhouette_fromProb.csv
Loading Data/PBMC1/antibody_davies_bouldin_fromProb.csv
Loading Data/PBMC1/antibody_Calinski_Harabasz_fromProb.csv
Loading Data/PBMC2/default_silhouette.csv
Loading Data/PBMC2/default_davies_bouldin.csv
Loading Data/PBMC2/default_Calinski_Harabasz.csv
Loading Data/PBMC2/default_silhouette_fromProb.csv
Loading Data/PBMC2/default_davies_bouldin_fromProb.csv
Loading Data/PBMC2/default_Calinski_Harabasz_fromProb.csv
Loading Data/PBMC2/celltypist_silhouette.csv
Loading Data/PBMC2/celltypist_davies_bouldin.csv
Loading Data/PBMC2/celltypist_Calinski_Harabasz.csv
Loading Data/PBMC2/celltypist_silhouette_fromProb.csv
Loading Data/PBMC2/celltypist_davies_bouldin_fromProb.csv
Loading Data/PBMC2/celltypist_Calinski_Harabasz_fromProb.csv
Loading Data/PBMC2/antibody_silhouette.csv
Loading Data/PBMC2/antibody_davies_bouldin.csv
Loading Data/PBMC2/antibody_Calinski_Harabasz.csv
Loading Data/PBMC2/antibody_silhouette_fromProb.csv
Loading Data/PBMC2/antibody_davies_bouldin_fromProb.csv
Loading Data/PBMC2/antibody_Calinski_Harabasz_fromProb.csv
Loading Data/PBMC3/default_silhouette.csv
Loading Data/PBMC3/default_davies_bouldin.csv
Loading Data/PBMC3/default_Calinski_Harabasz.csv
Loading Data/PBMC3/default_silhouette_fromProb.csv
Loading Data/PBMC3/default_davies_bouldin_fromProb.csv
Loading Data/PBMC3/default_Calinski_Harabasz_fromProb.csv
Loading Data/PBMC3/celltypist_silhouette.csv
Loading Data/PBMC3/celltypist_davies_bouldin.csv
Loading Data/PBMC3/celltypist_Calinski_Harabasz.csv
Loading Data/PBMC3/celltypist_silhouette_fromProb.csv
Loading Data/PBMC3/celltypist_davies_bouldin_fromProb.csv
Loading Data/PBMC3/celltypist_Calinski_Harabasz_fromProb.csv
Loading Data/PBMC3/antibody_silhouette.csv
Loading Data/PBMC3/antibody_davies_bouldin.csv
Loading Data/PBMC3/antibody_Calinski_Harabasz.csv
Loading Data/PBMC3/antibody_silhouette_fromProb.csv
Loading Data/PBMC3/antibody_davies_bouldin_fromProb.csv
Loading Data/PBMC3/antibody_Calinski_Harabasz_fromProb.csv
Loading Data/PBMC4/default_silhouette.csv
Loading Data/PBMC4/default_davies_bouldin.csv
Loading Data/PBMC4/default_Calinski_Harabasz.csv
Loading Data/PBMC4/default_silhouette_fromProb.csv
Loading Data/PBMC4/default_davies_bouldin_fromProb.csv
Loading Data/PBMC4/default_Calinski_Harabasz_fromProb.csv
Loading Data/PBMC4/celltypist_silhouette.csv
Loading Data/PBMC4/celltypist_davies_bouldin.csv
Loading Data/PBMC4/celltypist_Calinski_Harabasz.csv
Loading Data/PBMC4/celltypist_silhouette_fromProb.csv
Loading Data/PBMC4/celltypist_davies_bouldin_fromProb.csv
Loading Data/PBMC4/celltypist_Calinski_Harabasz_fromProb.csv
Loading Data/PBMC4/antibody_silhouette.csv
Loading Data/PBMC4/antibody_davies_bouldin.csv
Loading Data/PBMC4/antibody_Calinski_Harabasz.csv
Loading Data/PBMC4/antibody_silhouette_fromProb.csv
Loading Data/PBMC4/antibody_davies_bouldin_fromProb.csv
Loading Data/PBMC4/antibody_Calinski_Harabasz_fromProb.csv
         tool     value   tuning dataset  score_type
0  Unnamed: 0  0.000000  default   PBMC1  silhouette
1     monocle  0.106025  default   PBMC1  silhouette
2      scanpy  0.062012  default   PBMC1  silhouette
3  scvi-tools  0.087622  default   PBMC1  silhouette
4      seurat  0.168956  default   PBMC1  silhouette